From a024f6186d2918e8b0c6837b1e23295b8a66ff6b Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Oct 2025 18:33:11 +0000 Subject: [PATCH 001/158] jit lto interleaved scan --- .../all_cuda-129_arch-aarch64.yaml | 5 +- .../all_cuda-129_arch-x86_64.yaml | 5 +- .../all_cuda-130_arch-aarch64.yaml | 5 +- .../all_cuda-130_arch-x86_64.yaml | 5 +- .../bench_ann_cuda-129_arch-aarch64.yaml | 9 +- .../bench_ann_cuda-129_arch-x86_64.yaml | 9 +- .../bench_ann_cuda-130_arch-aarch64.yaml | 9 +- .../bench_ann_cuda-130_arch-x86_64.yaml | 9 +- .../go_cuda-129_arch-aarch64.yaml | 5 +- .../environments/go_cuda-129_arch-x86_64.yaml | 5 +- .../go_cuda-130_arch-aarch64.yaml | 5 +- .../environments/go_cuda-130_arch-x86_64.yaml | 5 +- .../rust_cuda-129_arch-aarch64.yaml | 5 +- .../rust_cuda-129_arch-x86_64.yaml | 5 +- .../rust_cuda-130_arch-aarch64.yaml | 5 +- .../rust_cuda-130_arch-x86_64.yaml | 5 +- conda/recipes/cuvs-bench/recipe.yaml | 3 + conda/recipes/libcuvs/recipe.yaml | 14 + cpp/CMakeLists.txt | 50 + .../interleaved_scan.cmake | 1300 +++++++++++++++++ cpp/cmake/modules/ConfigureCUDA.cmake | 7 +- cpp/cmake/modules/embed_fatbins.cmake | 60 + cpp/cmake/modules/generate_header.cmake | 62 + .../cuvs/detail/jit_lto/AlgorithmLauncher.h | 49 + .../cuvs/detail/jit_lto/AlgorithmPlanner.h | 38 + .../cuvs/detail/jit_lto/FragmentDatabase.h | 62 + .../cuvs/detail/jit_lto/FragmentEntry.h | 74 + .../cuvs/detail/jit_lto/MakeFragmentKey.h | 43 + .../detail/jit_lto/RegisterKernelFragment.h | 36 + cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 217 +++ cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 135 ++ cpp/src/detail/jit_lto/FragmentDatabase.cu | 50 + cpp/src/detail/jit_lto/FragmentEntry.cu | 87 ++ cpp/src/detail/jit_lto/MakeFragmentKey.cu | 28 + .../ivf_flat/ivf_flat_interleaved_scan.cuh | 84 +- .../ivf_flat/jit_lto_kernels/README.md | 87 ++ .../jit_lto_kernels/generate_kernels.py | 272 ++++ ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...l_0_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...l_0_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ..._0_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ..._0_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...el_0_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...el_0_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...l_0_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...l_0_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._0_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_0_1_false_false_f_f_l_b_inner_1_id.cu | 71 + ..._0_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_0_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ..._0_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_0_1_false_false_h_h_l_b_inner_1_id.cu | 71 + ..._0_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_0_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...0_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...nel_0_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...0_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...nel_0_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...el_0_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...el_0_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ..._0_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ..._0_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ..._0_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ..._0_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...0_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...0_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...l_0_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_0_1_true_false_f_f_l_b_inner_1_id.cu | 71 + ...l_0_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_0_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ...l_0_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_0_1_true_false_h_h_l_b_inner_1_id.cu | 71 + ...l_0_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_0_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ..._0_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_0_1_true_false_sc_i_l_b_inner_1_id.cu | 71 + ..._0_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_0_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...0_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...nel_0_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...0_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...nel_0_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ...l_0_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ...l_0_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ...l_0_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ...l_0_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ..._0_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ..._0_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...0_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...0_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ..._0_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...rnel_0_4_false_false_f_f_l_b_inner_4_id.cu | 71 + ..._0_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...rnel_0_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ..._0_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ..._0_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ...l_0_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...ernel_0_4_true_false_f_f_l_b_inner_4_id.cu | 71 + ...l_0_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...ernel_0_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ...l_0_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ...l_0_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ..._0_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...rnel_0_8_false_false_h_h_l_b_inner_8_id.cu | 71 + ..._0_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...rnel_0_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ..._0_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ..._0_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ...l_0_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...ernel_0_8_true_false_h_h_l_b_inner_8_id.cu | 71 + ...l_0_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...ernel_0_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ...l_0_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ...l_0_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...128_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...128_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...28_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...28_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ..._128_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ..._128_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...128_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...128_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ...28_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...el_128_1_false_false_f_f_l_b_inner_1_id.cu | 72 + ...28_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...el_128_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ...28_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...el_128_1_false_false_h_h_l_b_inner_1_id.cu | 72 + ...28_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...el_128_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...8_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...l_128_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...8_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...l_128_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ..._128_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ..._128_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ...28_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ...28_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ...28_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ...28_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...8_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...8_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...128_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...nel_128_1_true_false_f_f_l_b_inner_1_id.cu | 72 + ...128_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...nel_128_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ...128_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...nel_128_1_true_false_h_h_l_b_inner_1_id.cu | 72 + ...128_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...nel_128_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ...28_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...el_128_1_true_false_sc_i_l_b_inner_1_id.cu | 72 + ...28_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...el_128_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...8_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...l_128_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...8_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...l_128_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ...128_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ...128_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ...128_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ...128_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ...28_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ...28_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...8_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...8_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...28_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...el_128_4_false_false_f_f_l_b_inner_4_id.cu | 72 + ...28_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...el_128_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ...28_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ...28_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ...128_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...nel_128_4_true_false_f_f_l_b_inner_4_id.cu | 72 + ...128_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...nel_128_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ...128_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ...128_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ...28_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...el_128_8_false_false_h_h_l_b_inner_8_id.cu | 72 + ...28_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...el_128_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ...28_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ...28_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ...128_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...nel_128_8_true_false_h_h_l_b_inner_8_id.cu | 72 + ...128_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...nel_128_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ...128_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ...128_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ..._16_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ..._16_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...16_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...16_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...l_16_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...l_16_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ..._16_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ..._16_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ...16_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...nel_16_1_false_false_f_f_l_b_inner_1_id.cu | 72 + ...16_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...nel_16_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ...16_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...nel_16_1_false_false_h_h_l_b_inner_1_id.cu | 72 + ...16_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...nel_16_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...6_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...el_16_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...6_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...el_16_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...l_16_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...l_16_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ...16_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ...16_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ...16_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ...16_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...6_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...6_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ..._16_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...6_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_16_1_true_false_f_f_l_b_inner_1_id.cu | 71 + ..._16_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...6_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_16_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ..._16_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...6_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_16_1_true_false_h_h_l_b_inner_1_id.cu | 71 + ..._16_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...6_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_16_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ...16_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...nel_16_1_true_false_sc_i_l_b_inner_1_id.cu | 72 + ...16_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...nel_16_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...6_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...el_16_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...6_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...el_16_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ..._16_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ..._16_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ..._16_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ..._16_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ...16_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ...16_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...6_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...6_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...16_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...nel_16_4_false_false_f_f_l_b_inner_4_id.cu | 72 + ...16_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...nel_16_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ...16_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ...16_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ..._16_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...6_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...rnel_16_4_true_false_f_f_l_b_inner_4_id.cu | 71 + ..._16_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...6_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...rnel_16_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ..._16_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ..._16_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ...16_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...nel_16_8_false_false_h_h_l_b_inner_8_id.cu | 72 + ...16_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...nel_16_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ...16_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ...16_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ..._16_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...6_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...rnel_16_8_true_false_h_h_l_b_inner_8_id.cu | 71 + ..._16_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...6_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...rnel_16_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ..._16_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ..._16_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...l_1_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...l_1_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ..._1_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ..._1_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...el_1_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...el_1_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...l_1_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...l_1_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._1_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_1_1_false_false_f_f_l_b_inner_1_id.cu | 71 + ..._1_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_1_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ..._1_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_1_1_false_false_h_h_l_b_inner_1_id.cu | 71 + ..._1_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_1_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...1_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...nel_1_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...1_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...nel_1_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...el_1_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...el_1_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ..._1_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ..._1_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ..._1_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ..._1_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...1_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...1_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...l_1_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_1_1_true_false_f_f_l_b_inner_1_id.cu | 71 + ...l_1_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_1_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ...l_1_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_1_1_true_false_h_h_l_b_inner_1_id.cu | 71 + ...l_1_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_1_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ..._1_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_1_1_true_false_sc_i_l_b_inner_1_id.cu | 71 + ..._1_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_1_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...1_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...nel_1_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...1_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...nel_1_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ...l_1_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ...l_1_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ...l_1_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ...l_1_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ..._1_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ..._1_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...1_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...1_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ..._1_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...rnel_1_4_false_false_f_f_l_b_inner_4_id.cu | 71 + ..._1_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...rnel_1_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ..._1_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ..._1_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ...l_1_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...ernel_1_4_true_false_f_f_l_b_inner_4_id.cu | 71 + ...l_1_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...ernel_1_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ...l_1_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ...l_1_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ..._1_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...rnel_1_8_false_false_h_h_l_b_inner_8_id.cu | 71 + ..._1_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...rnel_1_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ..._1_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ..._1_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ...l_1_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...ernel_1_8_true_false_h_h_l_b_inner_8_id.cu | 71 + ...l_1_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...ernel_1_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ...l_1_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ...l_1_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...256_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...256_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...56_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...56_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ..._256_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ..._256_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...256_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...256_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ...56_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...el_256_1_false_false_f_f_l_b_inner_1_id.cu | 72 + ...56_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...el_256_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ...56_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...el_256_1_false_false_h_h_l_b_inner_1_id.cu | 72 + ...56_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...el_256_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...6_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...l_256_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...6_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...l_256_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ..._256_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ..._256_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ...56_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ...56_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ...56_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ...56_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...6_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...6_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...256_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...6_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...nel_256_1_true_false_f_f_l_b_inner_1_id.cu | 72 + ...256_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...6_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...nel_256_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ...256_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...6_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...nel_256_1_true_false_h_h_l_b_inner_1_id.cu | 72 + ...256_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...6_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...nel_256_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ...56_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...el_256_1_true_false_sc_i_l_b_inner_1_id.cu | 72 + ...56_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...el_256_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...6_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...l_256_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...6_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...l_256_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ...256_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ...256_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ...256_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ...256_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ...56_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ...56_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...6_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...6_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...56_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...el_256_4_false_false_f_f_l_b_inner_4_id.cu | 72 + ...56_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...el_256_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ...56_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ...56_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ...256_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...6_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...nel_256_4_true_false_f_f_l_b_inner_4_id.cu | 72 + ...256_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...6_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...nel_256_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ...256_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ...256_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ...56_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...el_256_8_false_false_h_h_l_b_inner_8_id.cu | 72 + ...56_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...el_256_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ...56_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ...56_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ...256_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...6_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...nel_256_8_true_false_h_h_l_b_inner_8_id.cu | 72 + ...256_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...6_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...nel_256_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ...256_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ...256_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...l_2_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...l_2_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ..._2_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ..._2_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...el_2_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...el_2_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...l_2_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...l_2_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._2_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_2_1_false_false_f_f_l_b_inner_1_id.cu | 71 + ..._2_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_2_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ..._2_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_2_1_false_false_h_h_l_b_inner_1_id.cu | 71 + ..._2_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_2_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...2_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...nel_2_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...2_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...nel_2_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...el_2_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...el_2_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ..._2_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ..._2_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ..._2_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ..._2_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...2_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...2_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...l_2_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_2_1_true_false_f_f_l_b_inner_1_id.cu | 71 + ...l_2_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_2_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ...l_2_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_2_1_true_false_h_h_l_b_inner_1_id.cu | 71 + ...l_2_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_2_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ..._2_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_2_1_true_false_sc_i_l_b_inner_1_id.cu | 71 + ..._2_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_2_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...2_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...nel_2_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...2_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...nel_2_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ...l_2_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ...l_2_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ...l_2_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ...l_2_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ..._2_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ..._2_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...2_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...2_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ..._2_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...rnel_2_4_false_false_f_f_l_b_inner_4_id.cu | 71 + ..._2_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...rnel_2_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ..._2_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ..._2_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ...l_2_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...ernel_2_4_true_false_f_f_l_b_inner_4_id.cu | 71 + ...l_2_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...ernel_2_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ...l_2_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ...l_2_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ..._2_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...rnel_2_8_false_false_h_h_l_b_inner_8_id.cu | 71 + ..._2_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...rnel_2_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ..._2_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ..._2_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ...l_2_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...ernel_2_8_true_false_h_h_l_b_inner_8_id.cu | 71 + ...l_2_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...ernel_2_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ...l_2_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ...l_2_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ..._32_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ..._32_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...32_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...32_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...l_32_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...l_32_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ..._32_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ..._32_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ...32_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...nel_32_1_false_false_f_f_l_b_inner_1_id.cu | 72 + ...32_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...nel_32_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ...32_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...nel_32_1_false_false_h_h_l_b_inner_1_id.cu | 72 + ...32_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...nel_32_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...2_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...el_32_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...2_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...el_32_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...l_32_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...l_32_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ...32_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ...32_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ...32_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ...32_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...2_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...2_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ..._32_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_32_1_true_false_f_f_l_b_inner_1_id.cu | 71 + ..._32_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_32_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ..._32_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_32_1_true_false_h_h_l_b_inner_1_id.cu | 71 + ..._32_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_32_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ...32_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...nel_32_1_true_false_sc_i_l_b_inner_1_id.cu | 72 + ...32_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...nel_32_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...2_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...el_32_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...2_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...el_32_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ..._32_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ..._32_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ..._32_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ..._32_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ...32_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ...32_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...2_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...2_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...32_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...nel_32_4_false_false_f_f_l_b_inner_4_id.cu | 72 + ...32_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...nel_32_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ...32_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ...32_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ..._32_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...rnel_32_4_true_false_f_f_l_b_inner_4_id.cu | 71 + ..._32_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...rnel_32_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ..._32_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ..._32_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ...32_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...nel_32_8_false_false_h_h_l_b_inner_8_id.cu | 72 + ...32_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...nel_32_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ...32_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ...32_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ..._32_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...rnel_32_8_true_false_h_h_l_b_inner_8_id.cu | 71 + ..._32_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...rnel_32_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ..._32_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ..._32_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...l_4_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...l_4_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ..._4_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ..._4_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...el_4_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...el_4_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...l_4_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...l_4_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._4_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_4_1_false_false_f_f_l_b_inner_1_id.cu | 71 + ..._4_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_4_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ..._4_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_4_1_false_false_h_h_l_b_inner_1_id.cu | 71 + ..._4_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_4_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...4_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...nel_4_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...4_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...nel_4_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...el_4_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...el_4_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ..._4_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ..._4_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ..._4_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ..._4_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...4_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...4_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...l_4_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_4_1_true_false_f_f_l_b_inner_1_id.cu | 71 + ...l_4_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_4_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ...l_4_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_4_1_true_false_h_h_l_b_inner_1_id.cu | 71 + ...l_4_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_4_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ..._4_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_4_1_true_false_sc_i_l_b_inner_1_id.cu | 71 + ..._4_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_4_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...4_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...nel_4_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...4_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...nel_4_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ...l_4_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ...l_4_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ...l_4_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ...l_4_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ..._4_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ..._4_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...4_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...4_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ..._4_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...rnel_4_4_false_false_f_f_l_b_inner_4_id.cu | 71 + ..._4_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...rnel_4_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ..._4_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ..._4_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ...l_4_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...ernel_4_4_true_false_f_f_l_b_inner_4_id.cu | 71 + ...l_4_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...ernel_4_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ...l_4_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ...l_4_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ..._4_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...rnel_4_8_false_false_h_h_l_b_inner_8_id.cu | 71 + ..._4_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...rnel_4_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ..._4_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ..._4_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ...l_4_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...ernel_4_8_true_false_h_h_l_b_inner_8_id.cu | 71 + ...l_4_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...ernel_4_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ...l_4_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ...l_4_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ..._64_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ..._64_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...64_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...64_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...l_64_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...l_64_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ..._64_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ..._64_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ...64_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...nel_64_1_false_false_f_f_l_b_inner_1_id.cu | 72 + ...64_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...nel_64_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ...64_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...nel_64_1_false_false_h_h_l_b_inner_1_id.cu | 72 + ...64_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...nel_64_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...4_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...el_64_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...4_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...el_64_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...l_64_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...l_64_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ...64_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ...64_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ...64_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ...64_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...4_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...4_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ..._64_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_64_1_true_false_f_f_l_b_inner_1_id.cu | 71 + ..._64_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_64_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ..._64_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_64_1_true_false_h_h_l_b_inner_1_id.cu | 71 + ..._64_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_64_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ...64_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...nel_64_1_true_false_sc_i_l_b_inner_1_id.cu | 72 + ...64_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...nel_64_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...4_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...el_64_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...4_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...el_64_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ..._64_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ..._64_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ..._64_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ..._64_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ...64_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ...64_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...4_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...4_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...64_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...nel_64_4_false_false_f_f_l_b_inner_4_id.cu | 72 + ...64_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...nel_64_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ...64_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ...64_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ..._64_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...rnel_64_4_true_false_f_f_l_b_inner_4_id.cu | 71 + ..._64_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...rnel_64_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ..._64_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ..._64_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ...64_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...nel_64_8_false_false_h_h_l_b_inner_8_id.cu | 72 + ...64_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...nel_64_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ...64_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ...64_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ..._64_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...rnel_64_8_true_false_h_h_l_b_inner_8_id.cu | 71 + ..._64_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...rnel_64_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ..._64_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ..._64_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 + ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...l_8_16_false_false_sc_i_l_b_inner_16_id.cu | 72 + ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 + ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...l_8_16_false_false_sc_i_l_n_inner_16_id.cu | 71 + ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ..._8_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 + ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ..._8_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 + ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 + ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 + ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 + ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 + ...el_8_16_true_false_sc_i_l_b_inner_16_id.cu | 72 + ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 + ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 + ...el_8_16_true_false_sc_i_l_n_inner_16_id.cu | 71 + ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 + ...l_8_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 + ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 + ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 + ...l_8_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 + ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 + ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 + ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 + ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 + ..._8_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_8_1_false_false_f_f_l_b_inner_1_id.cu | 71 + ..._8_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_8_1_false_false_f_f_l_n_inner_1_id.cu | 71 + ..._8_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 + ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_8_1_false_false_h_h_l_b_inner_1_id.cu | 71 + ..._8_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 + ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_8_1_false_false_h_h_l_n_inner_1_id.cu | 71 + ...8_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 + ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...nel_8_1_false_false_sc_i_l_b_inner_1_id.cu | 72 + ...8_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 + ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...nel_8_1_false_false_sc_i_l_n_inner_1_id.cu | 71 + ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...el_8_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 + ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...el_8_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 + ..._8_1_false_true_f_f_l_b_inner_1_compose.cu | 75 + ..._8_1_false_true_f_f_l_n_inner_1_compose.cu | 74 + ..._8_1_false_true_h_h_l_b_inner_1_compose.cu | 75 + ..._8_1_false_true_h_h_l_n_inner_1_compose.cu | 74 + ...8_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 + ...8_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 + ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 + ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 + ...l_8_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 + ...8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_8_1_true_false_f_f_l_b_inner_1_id.cu | 71 + ...l_8_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 + ...8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_8_1_true_false_f_f_l_n_inner_1_id.cu | 71 + ...l_8_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 + ...8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 + ...ernel_8_1_true_false_h_h_l_b_inner_1_id.cu | 71 + ...l_8_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 + ...8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 + ...ernel_8_1_true_false_h_h_l_n_inner_1_id.cu | 71 + ..._8_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 + ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 + ...rnel_8_1_true_false_sc_i_l_b_inner_1_id.cu | 71 + ..._8_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 + ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 + ...rnel_8_1_true_false_sc_i_l_n_inner_1_id.cu | 71 + ...8_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 + ...nel_8_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 + ...8_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 + ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 + ...nel_8_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 + ...l_8_1_true_true_f_f_l_b_inner_1_compose.cu | 75 + ...l_8_1_true_true_f_f_l_n_inner_1_compose.cu | 74 + ...l_8_1_true_true_h_h_l_b_inner_1_compose.cu | 75 + ...l_8_1_true_true_h_h_l_n_inner_1_compose.cu | 74 + ..._8_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 + ..._8_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 + ...8_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 + ...8_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 + ..._8_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 + ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...rnel_8_4_false_false_f_f_l_b_inner_4_id.cu | 71 + ..._8_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 + ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...rnel_8_4_false_false_f_f_l_n_inner_4_id.cu | 71 + ..._8_4_false_true_f_f_l_b_inner_4_compose.cu | 75 + ..._8_4_false_true_f_f_l_n_inner_4_compose.cu | 74 + ...l_8_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 + ...8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 + ...ernel_8_4_true_false_f_f_l_b_inner_4_id.cu | 71 + ...l_8_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 + ...8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 + ...ernel_8_4_true_false_f_f_l_n_inner_4_id.cu | 71 + ...l_8_4_true_true_f_f_l_b_inner_4_compose.cu | 75 + ...l_8_4_true_true_f_f_l_n_inner_4_compose.cu | 74 + ..._8_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 + ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...rnel_8_8_false_false_h_h_l_b_inner_8_id.cu | 71 + ..._8_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 + ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...rnel_8_8_false_false_h_h_l_n_inner_8_id.cu | 71 + ..._8_8_false_true_h_h_l_b_inner_8_compose.cu | 75 + ..._8_8_false_true_h_h_l_n_inner_8_compose.cu | 74 + ...l_8_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 + ...8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 + ...ernel_8_8_true_false_h_h_l_b_inner_8_id.cu | 71 + ...l_8_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 + ...8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 + ...ernel_8_8_true_false_h_h_l_n_inner_8_id.cu | 71 + ...l_8_8_true_true_h_h_l_b_inner_8_compose.cu | 75 + ...l_8_8_true_true_h_h_l_n_inner_8_compose.cu | 74 + .../interleaved_scan_kernels.txt | 1280 ++++++++++++++++ .../interleaved_scan_planner.hpp | 36 + dependencies.yaml | 79 +- python/cuvs/pyproject.toml | 12 +- python/cuvs_bench/pyproject.toml | 2 +- python/libcuvs/pyproject.toml | 15 +- 1323 files changed, 96755 insertions(+), 106 deletions(-) create mode 100644 cpp/cmake/jit_lto_kernels_list/interleaved_scan.cmake create mode 100644 cpp/cmake/modules/embed_fatbins.cmake create mode 100644 cpp/cmake/modules/generate_header.cmake create mode 100644 cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h create mode 100644 cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h create mode 100644 cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h create mode 100644 cpp/include/cuvs/detail/jit_lto/FragmentEntry.h create mode 100644 cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h create mode 100644 cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h create mode 100644 cpp/src/detail/jit_lto/AlgorithmLauncher.cu create mode 100644 cpp/src/detail/jit_lto/AlgorithmPlanner.cu create mode 100644 cpp/src/detail/jit_lto/FragmentDatabase.cu create mode 100644 cpp/src/detail/jit_lto/FragmentEntry.cu create mode 100644 cpp/src/detail/jit_lto/MakeFragmentKey.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index af56842a11..257c91c8b4 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -31,7 +31,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==25.12.*,>=0.0.0a0 +- libnvjitlink-dev +- librmm==25.10.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -39,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.10.*,>=0.0.0a0 - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0.dev0 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 44739ffd6a..ec6d0fb958 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -31,7 +31,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==25.12.*,>=0.0.0a0 +- libnvjitlink-dev +- librmm==25.10.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -39,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.10.*,>=0.0.0a0 - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0.dev0 diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index 3a81210868..7b802b2309 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -31,7 +31,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==25.12.*,>=0.0.0a0 +- libnvjitlink-dev +- librmm==25.10.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -39,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.10.*,>=0.0.0a0 - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0.dev0 diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index 2e93cf1489..4d02d49918 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -31,7 +31,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==25.12.*,>=0.0.0a0 +- libnvjitlink-dev +- librmm==25.10.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -39,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.10.*,>=0.0.0a0 - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0.dev0 diff --git a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml index 5fe174ab6a..0126cd44b3 100644 --- a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=12.9.2,<13.0a0 - cuda-version=12.9 - cupy>=13.6.0 -- cuvs==25.12.*,>=0.0.0a0 +- cuvs==25.10.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -30,15 +30,16 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- librmm==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- librmm==25.10.*,>=0.0.0a0 - matplotlib-base>=3.9 - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.10.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.4.0,<0.5.0.dev0 - requests diff --git a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml index 098e1765fa..33aa1eaf78 100644 --- a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=12.9.2,<13.0a0 - cuda-version=12.9 - cupy>=13.6.0 -- cuvs==25.12.*,>=0.0.0a0 +- cuvs==25.10.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -32,8 +32,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- librmm==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- librmm==25.10.*,>=0.0.0a0 - matplotlib-base>=3.9 - mkl-devel=2023 - nccl>=2.19 @@ -41,7 +42,7 @@ dependencies: - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.10.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.4.0,<0.5.0.dev0 - requests diff --git a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml index 3a54fecedb..639cffd860 100644 --- a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=13.0.1,<14.0a0 - cuda-version=13.0 - cupy>=13.6.0 -- cuvs==25.12.*,>=0.0.0a0 +- cuvs==25.10.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -30,15 +30,16 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- librmm==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- librmm==25.10.*,>=0.0.0a0 - matplotlib-base>=3.9 - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.10.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.4.0,<0.5.0.dev0 - requests diff --git a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml index 95aa4d7a13..88fa65f162 100644 --- a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=13.0.1,<14.0a0 - cuda-version=13.0 - cupy>=13.6.0 -- cuvs==25.12.*,>=0.0.0a0 +- cuvs==25.10.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -32,8 +32,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- librmm==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- librmm==25.10.*,>=0.0.0a0 - matplotlib-base>=3.9 - mkl-devel=2023 - nccl>=2.19 @@ -41,7 +42,7 @@ dependencies: - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.10.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.4.0,<0.5.0.dev0 - requests diff --git a/conda/environments/go_cuda-129_arch-aarch64.yaml b/conda/environments/go_cuda-129_arch-aarch64.yaml index b8bf557877..2cd0bebe3a 100644 --- a/conda/environments/go_cuda-129_arch-aarch64.yaml +++ b/conda/environments/go_cuda-129_arch-aarch64.yaml @@ -24,8 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- libraft==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- libraft==25.10.*,>=0.0.0a0 - nccl>=2.19 - ninja - sysroot_linux-aarch64==2.28 diff --git a/conda/environments/go_cuda-129_arch-x86_64.yaml b/conda/environments/go_cuda-129_arch-x86_64.yaml index adc12d644b..c14dab9cbe 100644 --- a/conda/environments/go_cuda-129_arch-x86_64.yaml +++ b/conda/environments/go_cuda-129_arch-x86_64.yaml @@ -24,8 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- libraft==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- libraft==25.10.*,>=0.0.0a0 - nccl>=2.19 - ninja - sysroot_linux-64==2.28 diff --git a/conda/environments/go_cuda-130_arch-aarch64.yaml b/conda/environments/go_cuda-130_arch-aarch64.yaml index ca450a317c..1edc9e74e7 100644 --- a/conda/environments/go_cuda-130_arch-aarch64.yaml +++ b/conda/environments/go_cuda-130_arch-aarch64.yaml @@ -24,8 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- libraft==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- libraft==25.10.*,>=0.0.0a0 - nccl>=2.19 - ninja - sysroot_linux-aarch64==2.28 diff --git a/conda/environments/go_cuda-130_arch-x86_64.yaml b/conda/environments/go_cuda-130_arch-x86_64.yaml index 5873836633..d4f5d1cd10 100644 --- a/conda/environments/go_cuda-130_arch-x86_64.yaml +++ b/conda/environments/go_cuda-130_arch-x86_64.yaml @@ -24,8 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- libraft==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- libraft==25.10.*,>=0.0.0a0 - nccl>=2.19 - ninja - sysroot_linux-64==2.28 diff --git a/conda/environments/rust_cuda-129_arch-aarch64.yaml b/conda/environments/rust_cuda-129_arch-aarch64.yaml index 28d7701d68..01a95cc416 100644 --- a/conda/environments/rust_cuda-129_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-129_arch-aarch64.yaml @@ -21,8 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- libraft==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- libraft==25.10.*,>=0.0.0a0 - make - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-129_arch-x86_64.yaml b/conda/environments/rust_cuda-129_arch-x86_64.yaml index a21932185b..4479cf8038 100644 --- a/conda/environments/rust_cuda-129_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-129_arch-x86_64.yaml @@ -21,8 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- libraft==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- libraft==25.10.*,>=0.0.0a0 - make - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-130_arch-aarch64.yaml b/conda/environments/rust_cuda-130_arch-aarch64.yaml index 7533f45e23..4049a2c5e1 100644 --- a/conda/environments/rust_cuda-130_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-130_arch-aarch64.yaml @@ -21,8 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- libraft==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- libraft==25.10.*,>=0.0.0a0 - make - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-130_arch-x86_64.yaml b/conda/environments/rust_cuda-130_arch-x86_64.yaml index 0b4dbd7b09..142033b23a 100644 --- a/conda/environments/rust_cuda-130_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-130_arch-x86_64.yaml @@ -21,8 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.12.*,>=0.0.0a0 -- libraft==25.12.*,>=0.0.0a0 +- libcuvs==25.10.*,>=0.0.0a0 +- libnvjitlink-dev +- libraft==25.10.*,>=0.0.0a0 - make - nccl>=2.19 - ninja diff --git a/conda/recipes/cuvs-bench/recipe.yaml b/conda/recipes/cuvs-bench/recipe.yaml index fe12bea9ae..2e0e750701 100644 --- a/conda/recipes/cuvs-bench/recipe.yaml +++ b/conda/recipes/cuvs-bench/recipe.yaml @@ -30,6 +30,7 @@ requirements: - cuda-version =${{ cuda_version }} - libcuvs =${{ version }} - libcuvs-bench-ann =${{ version }} + - libnvjitlink-dev - python =${{ py_version }} - pip - rapids-build-backend >=0.4.0,<0.5.0.dev0 @@ -46,6 +47,7 @@ requirements: - h5py ${{ h5py_version }} - libcublas - libcuvs-bench-ann =${{ version }} + - libnvjitlink-dev - matplotlib-base>=3.9 - pandas - pylibraft =${{ minor_version }} @@ -65,6 +67,7 @@ requirements: - libaio - libboost - libcublas + - libnvjitlink-dev - mkl tests: diff --git a/conda/recipes/libcuvs/recipe.yaml b/conda/recipes/libcuvs/recipe.yaml index cd5f47050e..272e44db27 100644 --- a/conda/recipes/libcuvs/recipe.yaml +++ b/conda/recipes/libcuvs/recipe.yaml @@ -58,6 +58,7 @@ cache: - ninja - ${{ stdlib("c") }} host: + - libnvjitlink-dev - librmm =${{ minor_version }} - libraft-headers =${{ minor_version }} - nccl ${{ nccl_version }} @@ -96,6 +97,7 @@ outputs: - cmake ${{ cmake_version }} - ${{ stdlib("c") }} host: + - libnvjitlink-dev - librmm =${{ minor_version }} - libraft-headers =${{ minor_version }} - nccl ${{ nccl_version }} @@ -108,6 +110,7 @@ outputs: - libcusparse-dev run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} + - libnvjitlink-dev - libraft-headers =${{ minor_version }} - nccl - cuda-cudart @@ -125,6 +128,8 @@ outputs: - libcurand - libcusolver - libcusparse + - libcuda.so.1 + - libnvjitlink-dev - librmm - mkl - nccl @@ -148,6 +153,7 @@ outputs: - cmake ${{ cmake_version }} - ${{ stdlib("c") }} host: + - libnvjitlink-dev - librmm =${{ minor_version }} - libraft-headers =${{ minor_version }} - nccl ${{ nccl_version }} @@ -163,6 +169,7 @@ outputs: run: - ${{ pin_subpackage("libcuvs", exact=True) }} - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} + - libnvjitlink-dev - libraft-headers =${{ minor_version }} - nccl - cuda-cudart @@ -180,6 +187,8 @@ outputs: - libcurand - libcusolver - libcusparse + - libcuda.so.1 + - libnvjitlink-dev - librmm - mkl - nccl @@ -238,6 +247,7 @@ outputs: host: - ${{ pin_subpackage("libcuvs", exact=True) }} - cuda-version =${{ cuda_version }} + - libnvjitlink-dev - libraft-headers =${{ minor_version }} - librmm =${{ minor_version }} - nccl ${{ nccl_version }} @@ -266,6 +276,7 @@ outputs: - libcurand - libcusolver - libcusparse + - libcuda.so.1 - librmm - mkl - nccl @@ -306,6 +317,7 @@ outputs: - libcusolver-dev - libcusparse-dev - libgomp + - libnvjitlink-dev - libraft-headers =${{ minor_version }} - librmm =${{ minor_version }} - nccl ${{ nccl_version }} @@ -323,6 +335,7 @@ outputs: - libcurand - libcusolver - libcusparse + - libnvjitlink-dev - libraft-headers =${{ minor_version }} - nccl - if: linux64 @@ -335,6 +348,7 @@ outputs: - libcurand - libcusolver - libcusparse + - libnvjitlink-dev - librmm - mkl - nccl diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d68c8df37a..8d176a595e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -215,6 +215,10 @@ if(BUILD_CAGRA_HNSWLIB) include(cmake/thirdparty/get_hnswlib.cmake) endif() +# this is needed to embed fatbins to JIT at runtime +include(cmake/modules/embed_fatbins.cmake) +include(cmake/jit_lto_kernels_list/interleaved_scan.cmake) + # ################################################################################################## # * cuvs --------------------------------------------------------------------- if(NOT BUILD_CPU_ONLY) @@ -327,6 +331,13 @@ if(NOT BUILD_CPU_ONLY) ) endif() + set(JIT_LTO_FILES + src/detail/jit_lto/AlgorithmLauncher.cu + src/detail/jit_lto/FragmentDatabase.cu + src/detail/jit_lto/FragmentEntry.cu + src/detail/jit_lto/MakeFragmentKey.cu + ) + add_library( cuvs_objs OBJECT src/cluster/kmeans_balanced_fit_float.cu @@ -522,6 +533,7 @@ if(NOT BUILD_CPU_ONLY) src/stats/silhouette_score.cu src/stats/trustworthiness_score.cu ${CUVS_MG_ALGOS} + ${JIT_LTO_FILES} ) set_target_properties( @@ -555,6 +567,42 @@ if(NOT BUILD_CPU_ONLY) INTERFACE "$" ) + add_library( + jit_lto_fatbins OBJECT + ${INTERLEAVED_SCAN_KERNEL_FILES} + ) + + target_compile_definitions(jit_lto_fatbins PRIVATE BUILD_KERNEL) + target_include_directories(jit_lto_fatbins PRIVATE "$") + target_compile_options(jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size) + target_compile_options( + jit_lto_fatbins PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + ) + set_target_properties(jit_lto_fatbins PROPERTIES + CUDA_ARCHITECTURES "75-real" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + CUDA_SEPARABLE_COMPILATION ON + CUDA_FATBIN_COMPILATION ON + POSITION_INDEPENDENT_CODE ON + INTERPROCEDURAL_OPTIMIZATION ON) + target_link_libraries(jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) + + add_library(jit_lto_fatbins_as_cpp_sources STATIC + src/detail/jit_lto/AlgorithmPlanner.cu + ) + target_include_directories(jit_lto_fatbins_as_cpp_sources PRIVATE "$") + target_compile_options( + jit_lto_fatbins_as_cpp_sources PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + ) + target_link_libraries(jit_lto_fatbins_as_cpp_sources PRIVATE CUDA::cuda_driver rmm::rmm raft::raft CCCL::CCCL) + + embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) + # Endian detection include(TestBigEndian) test_big_endian(BIG_ENDIAN) @@ -627,6 +675,8 @@ if(NOT BUILD_CPU_ONLY) $<$:CUDA::nvtx3> PRIVATE nvidia::cutlass::cutlass $ cuvs-cagra-search + $ + CUDA::nvJitLink ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries diff --git a/cpp/cmake/jit_lto_kernels_list/interleaved_scan.cmake b/cpp/cmake/jit_lto_kernels_list/interleaved_scan.cmake new file mode 100644 index 0000000000..9c3bc41e8f --- /dev/null +++ b/cpp/cmake/jit_lto_kernels_list/interleaved_scan.cmake @@ -0,0 +1,1300 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + + +# Auto-generated list of interleaved scan kernel files +# Generated by generate_kernels.py + +set(INTERLEAVED_SCAN_KERNEL_FILES + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu + src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu +) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index d8576cd650..5ce9b918b4 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -19,11 +19,6 @@ if(DISABLE_DEPRECATION_WARNINGS) ) endif() -if(DISABLE_OPENMP) - list(APPEND CUVS_CXX_FLAGS -Wno-unknown-pragmas) - list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wno-unknown-pragmas) -endif() - # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with # clang) if(CMAKE_COMPILER_IS_GNUCXX) @@ -69,6 +64,6 @@ endif() # Debug options if(CMAKE_BUILD_TYPE MATCHES Debug) message(VERBOSE "cuVS: Building with debugging flags") - list(APPEND CUVS_CUDA_FLAGS -G -Xcompiler=-rdynamic --maxrregcount=64) + list(APPEND CUVS_CUDA_FLAGS -G -Xcompiler=-rdynamic) list(APPEND CUVS_CUDA_FLAGS -Xptxas --suppress-stack-size-warning) endif() diff --git a/cpp/cmake/modules/embed_fatbins.cmake b/cpp/cmake/modules/embed_fatbins.cmake new file mode 100644 index 0000000000..d7573db36b --- /dev/null +++ b/cpp/cmake/modules/embed_fatbins.cmake @@ -0,0 +1,60 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + + +function(embed_fatbins library_name kernel_target) + find_package(CUDAToolkit REQUIRED) + find_program(bin_to_c + NAMES bin2c + PATHS ${CUDAToolkit_BIN_DIR} + ) + + set(output_dir ${CMAKE_CURRENT_BINARY_DIR}/${library_name}) + + # Create a response file to avoid "argument list too long" errors + set(objects_response_file ${CMAKE_CURRENT_BINARY_DIR}/${library_name}_objects.rsp) + + # Write the objects list to a response file using file(GENERATE) which handles generator expressions + file(GENERATE + OUTPUT "${objects_response_file}" + CONTENT "$,\n>\n" + ) + + # Generate individual headers for each FATBIN object + add_custom_command( + OUTPUT "${output_dir}/headers_generated.stamp" + COMMAND ${CMAKE_COMMAND} + "-DBIN_TO_C_COMMAND=${bin_to_c}" + "-DOBJECTS_RESPONSE_FILE=${objects_response_file}" + "-DOUTPUT_DIR=${output_dir}" + "-DSTAMP_FILE=${output_dir}/headers_generated.stamp" + -P ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/generate_header.cmake + VERBATIM + DEPENDS "${objects_response_file}" $ + COMMENT "Converting FATBIN kernels to individual C++ headers" + ) + + # get the sources of `kernel_target` and add them as CUDA + # sources so we re-compile them to get the inline registration logic + get_target_property(output_sources ${kernel_target} SOURCES) + + # add those c++ sources to `library_name` + target_sources(${library_name} + PRIVATE + "${output_dir}/headers_generated.stamp" + ${output_sources} + ) + target_compile_features(${library_name} PRIVATE cxx_std_20) + target_include_directories(${library_name} PRIVATE ${output_dir}) +endfunction() diff --git a/cpp/cmake/modules/generate_header.cmake b/cpp/cmake/modules/generate_header.cmake new file mode 100644 index 0000000000..f9f3e09439 --- /dev/null +++ b/cpp/cmake/modules/generate_header.cmake @@ -0,0 +1,62 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Read objects from response file to avoid argument length issues +if(DEFINED OBJECTS_RESPONSE_FILE) + file(READ "${OBJECTS_RESPONSE_FILE}" objects_content) + string(STRIP "${objects_content}" objects_content) + # Split by newlines since we joined with \n in the CMake file + string(REPLACE "\n" ";" objects_list "${objects_content}") +else() + # Fallback to direct objects (for backward compatibility) + set(objects_list "${OBJECTS}") +endif() + +# Create output directory if it doesn't exist +file(MAKE_DIRECTORY "${OUTPUT_DIR}") + +set(generated_headers) +foreach(obj ${objects_list}) + # Skip empty entries + if(NOT obj STREQUAL "") + get_filename_component(obj_ext ${obj} EXT) + get_filename_component(obj_name ${obj} NAME_WE) + get_filename_component(obj_dir ${obj} DIRECTORY) + + if(obj_ext MATCHES ".fatbin") + # Generate individual header file for this FATBIN + set(header_file "${OUTPUT_DIR}/${obj_name}.h") + + set(args -c -p 0x0 --name embedded_${obj_name} ${obj}) + execute_process(COMMAND "${BIN_TO_C_COMMAND}" ${args} + WORKING_DIRECTORY ${obj_dir} + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error_var + ) + if(NOT result EQUAL 0) + message(FATAL_ERROR "Failed to process ${obj}: ${error_var}") + endif() + + # Write individual header file + file(WRITE "${header_file}" "${output}") + list(APPEND generated_headers "${header_file}") + endif() + endif() +endforeach() + +# Create a stamp file to indicate completion +file(WRITE "${STAMP_FILE}" "Headers generated: ${generated_headers}") +list(LENGTH generated_headers num_headers) +message(STATUS "Generated ${num_headers} individual FATBIN headers") diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h new file mode 100644 index 0000000000..d567d6e138 --- /dev/null +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +struct AlgorithmLauncher { + AlgorithmLauncher() = default; + + AlgorithmLauncher(CUlibrary l, CUkernel k); + + template + void operator()( + cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, Args&&... args) + { + void* kernel_args[] = {const_cast(static_cast(&args))...}; + this->call(stream, grid, block, shared_mem, kernel_args); + } + + CUkernel get_kernel() { return this->kernel; } + + private: + void call(cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** args); + CUlibrary library; + CUkernel kernel; +}; + +std::unordered_map& get_cached_launchers(); diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h new file mode 100644 index 0000000000..83e8e86adf --- /dev/null +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "AlgorithmLauncher.h" + +struct FragmentEntry; + +struct AlgorithmPlanner { + AlgorithmPlanner(std::string const& n, std::string const& p) : name(n), params(p) {} + + AlgorithmLauncher get_launcher(); + + std::string name; + std::string params; + std::vector fragments; + + private: + void save_compute(); + AlgorithmLauncher build(); +}; diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h new file mode 100644 index 0000000000..297fbf662d --- /dev/null +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "FragmentEntry.h" +#include "MakeFragmentKey.h" + +struct NRTCLTOFragmentCompiler; + +// struct PerEntryCachedInfo { +// std::unordered_set, FragmentEntryHash, +// FragmentEntryEqual> +// entries; +// }; + +class FragmentDatabase { + public: + FragmentDatabase(FragmentDatabase const&) = delete; + FragmentDatabase(FragmentDatabase&&) = delete; + + FragmentDatabase& operator=(FragmentDatabase&&) = delete; + FragmentDatabase& operator=(FragmentDatabase const&) = delete; + + std::unordered_map> cache; + + private: + FragmentDatabase(); + + bool make_cache_entry(std::string const& name, std::string const& params); + + friend FragmentDatabase& fragment_database(); + + friend void registerFatbinFragment(std::string const& algo, + std::string const& params, + unsigned char const* blob, + std::size_t size); +}; + +FragmentDatabase& fragment_database(); + +void registerFatbinFragment(std::string const& algo, + std::string const& params, + unsigned char const* blob, + std::size_t size); diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h new file mode 100644 index 0000000000..d34229073b --- /dev/null +++ b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +struct FragmentEntry { + FragmentEntry(std::string const& params); + + bool operator==(const FragmentEntry& rhs) const { return compute_key == rhs.compute_key; } + + virtual bool add_to(nvJitLinkHandle& handle) const = 0; + + // std::size_t compute_arg_count = 0; //optimization for equality checks + std::string compute_key{}; +}; + +// struct FragmentEntryHash { +// using is_transparent = void; + +// std::size_t operator()(std::unique_ptr const& entry) const noexcept { +// return std::hash{}(entry->compute_key); +// } +// std::size_t operator()(FragmentEntry const* entry) const noexcept { +// return std::hash{}(entry->compute_key); +// } +// std::size_t operator()(std::vector const& params) const noexcept; +// }; + +// struct FragmentEntryEqual { +// using is_transparent = void; + +// template +// bool operator()(T const& t, U const& u) const { +// return std::to_address(t) == std::to_address(u); +// } + +// bool operator()(std::unique_ptr const& entry, +// std::string const& params) const noexcept +// { +// return this->operator()(params, entry); +// } + +// bool operator()(std::string const& params, +// std::unique_ptr const& entry) const noexcept; +// }; + +struct FatbinFragmentEntry final : FragmentEntry { + FatbinFragmentEntry(std::string const& params, unsigned char const* view, std::size_t size); + + virtual bool add_to(nvJitLinkHandle& handle) const; + + std::size_t data_size = 0; + unsigned char const* data_view = nullptr; +}; diff --git a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h new file mode 100644 index 0000000000..e35c5c6c62 --- /dev/null +++ b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h @@ -0,0 +1,43 @@ + +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace detail { +std::string nvrtc_name(std::type_info const& info); + +template +std::string type_as_string() +{ + if constexpr (std::is_reference_v) { + return std::string(typeid(T).name()) + "&"; + } else { + return std::string(typeid(T).name()); + } +} +} // namespace detail + +template +std::string make_fragment_key() +{ + std::string result; + ((result += detail::type_as_string() + "_"), ...); + return result; +} diff --git a/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h b/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h new file mode 100644 index 0000000000..560d09dc1d --- /dev/null +++ b/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h @@ -0,0 +1,36 @@ + +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "MakeFragmentKey.h" + +void registerFatbinFragment(std::string const& algo, + std::string const& params, + unsigned char const* blob, + std::size_t size); + +namespace { + +template +void registerAlgorithm(std::string algo, unsigned char const* blob, std::size_t size) +{ + auto key = make_fragment_key(); + registerFatbinFragment(algo, key, blob, size); +} + +} // namespace diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu new file mode 100644 index 0000000000..c2ac186467 --- /dev/null +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +AlgorithmLauncher::AlgorithmLauncher(CUlibrary l, CUkernel k) : library{l}, kernel{k} +{ + // // Validate that we have a valid kernel + // if (kernel == nullptr) { + // std::cerr << "ERROR: AlgorithmLauncher constructed with null kernel" << std::endl; + // } + // if (library == nullptr) { + // std::cerr << "ERROR: AlgorithmLauncher constructed with null library" << std::endl; + // } + // std::cout << "AlgorithmLauncher constructed with kernel: " << kernel << ", library: " << + // library + // << std::endl; +} + +void AlgorithmLauncher::call( + cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) +{ + // std::cout << "In the launcher" << std::endl; + + // // Validate inputs + // if (kernel == nullptr) { + // std::cerr << "ERROR: Cannot launch null kernel" << std::endl; + // return; + // } + + // if (grid.x == 0 || grid.y == 0 || grid.z == 0) { + // std::cerr << "ERROR: Invalid grid dimensions: " << grid.x << "x" << grid.y << "x" << grid.z + // << std::endl; + // return; + // } + + // if (block.x == 0 || block.y == 0 || block.z == 0) { + // std::cerr << "ERROR: Invalid block dimensions: " << block.x << "x" << block.y << "x" << + // block.z + // << std::endl; + // return; + // } + + // std::cout << "Grid: " << grid.x << "x" << grid.y << "x" << grid.z << ", Block: " << block.x << + // "x" + // << block.y << "x" << block.z << ", Shared mem: " << shared_mem << std::endl; + + // // Debug kernel arguments + // if (kernel_args != nullptr) { + // std::cout << "Kernel arguments pointer: " << kernel_args << std::endl; + // // Note: We can't safely dereference kernel_args without knowing the types, + // // but we can at least check if the pointer is valid + // } else { + // std::cout << "WARNING: kernel_args is null" << std::endl; + // } + CUlaunchAttribute attribute[1]; + attribute[0].id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION; + attribute[0].value.programmaticStreamSerializationAllowed = 1; + + CUlaunchConfig config{}; + config.gridDimX = grid.x; + config.gridDimY = grid.y; + config.gridDimZ = grid.z; + config.blockDimX = block.x; + config.blockDimY = block.y; + config.blockDimZ = block.z; + config.sharedMemBytes = shared_mem; + config.hStream = stream; + config.attrs = attribute; + config.numAttrs = 1; + + // std::cout << "Launching kernel" << std::endl; + + // // Check CUDA context + // CUcontext ctx; + // CUresult ctx_result = cuCtxGetCurrent(&ctx); + // if (ctx_result != CUDA_SUCCESS) { + // std::cerr << "ERROR: No active CUDA context. Error: " << ctx_result << std::endl; + // return; + // } + + // // Check stream validity + // if (stream == nullptr) { + // std::cerr << "ERROR: Stream is null" << std::endl; + // return; + // } + // std::cout << "Stream: " << stream << std::endl; + + // // Check device properties for debugging + // int device; + // cudaGetDevice(&device); + // cudaDeviceProp prop; + // cudaGetDeviceProperties(&prop, device); + // std::cout << "Device: " << device << " (" << prop.name << ")" << std::endl; + // std::cout << "Max threads per block: " << prop.maxThreadsPerBlock << std::endl; + // std::cout << "Max shared memory per block: " << prop.sharedMemPerBlock << " bytes" << + // std::endl; + + // // Check if our launch parameters are within limits + // int total_threads = block.x * block.y * block.z; + // if (total_threads > prop.maxThreadsPerBlock) { + // std::cerr << "ERROR: Block size exceeds max threads per block (" << total_threads << " > " + // << prop.maxThreadsPerBlock << ")" << std::endl; + // return; + // } + + // if (shared_mem > prop.sharedMemPerBlock) { + // std::cerr << "ERROR: Shared memory exceeds max per block (" << shared_mem << " > " + // << prop.sharedMemPerBlock << ")" << std::endl; + // return; + // } + + // // Launch kernel and check for errors + // std::cout << "About to launch kernel with cuLaunchKernelEx..." << std::endl; + // CUresult launch_result = + cuLaunchKernelEx(&config, (CUfunction)kernel, kernel_args, 0); + // if (launch_result != CUDA_SUCCESS) { + // std::cerr << "ERROR: Kernel launch failed with error: " << launch_result << std::endl; + // std::cerr << "This suggests the kernel function is invalid or there's a parameter issue" + // << std::endl; + // return; + // } + // std::cout << "cuLaunchKernelEx returned successfully" << std::endl; + + // std::cout << "Kernel launched successfully, synchronizing stream..." << std::endl; + + // // Check for CUDA runtime errors before synchronization + // cudaError_t cuda_err = cudaGetLastError(); + // if (cuda_err != cudaSuccess) { + // std::cerr << "ERROR: CUDA error before sync: " << cudaGetErrorString(cuda_err) << std::endl; + // return; + // } + + // Add timeout mechanism for debugging + // std::cout << "Starting stream synchronization (this may hang if kernel is stuck)..." << + // std::endl; + + // Try to get stream status first + // cudaStreamQuery(stream); + // cuda_err = cudaGetLastError(); + // if (cuda_err != cudaSuccess && cuda_err != cudaErrorNotReady) { + // std::cerr << "ERROR: Stream query failed: " << cudaGetErrorString(cuda_err) << std::endl; + // return; + // } + + // std::cout << "Stream query completed, proceeding with synchronization..." << std::endl; + + // // Let's try a different approach - check if the kernel is actually running + // std::cout << "About to call cudaStreamSynchronize - this is where it hangs..." << std::endl; + + // // First, let's try to see if we can get any information about the kernel + // std::cout << "Checking kernel function pointer: " << kernel << std::endl; + + // // Try to get kernel attributes + // int max_threads = 0; + // CUresult attr_result = + // cuFuncGetAttribute(&max_threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, + // (CUfunction)kernel); + // if (attr_result == CUDA_SUCCESS) { + // std::cout << "Kernel function appears to be valid, max threads per block: " << max_threads + // << std::endl; + // } else { + // std::cerr << "WARNING: Could not get kernel attributes, error: " << attr_result << std::endl; + // std::cerr << "This suggests the kernel function might be invalid!" << std::endl; + // } + + // // Try to get kernel name + // const char* kernel_name = nullptr; + // CUresult name_result = cuFuncGetName(&kernel_name, (CUfunction)kernel); + // if (name_result == CUDA_SUCCESS && kernel_name != nullptr) { + // std::cout << "Kernel name: " << kernel_name << std::endl; + // } else { + // std::cerr << "WARNING: Could not get kernel name, error: " << name_result << std::endl; + // } + + // // Now try the synchronization - this is where it hangs + // std::cout << "Calling cudaStreamSynchronize now..." << std::endl; + + // // Try using CUDA Driver API instead of runtime API + // CUstream cu_stream = (CUstream)stream; + // CUresult sync_result = cuStreamSynchronize(cu_stream); + // if (sync_result != CUDA_SUCCESS) { + // std::cerr << "ERROR: cuStreamSynchronize failed with error: " << sync_result << std::endl; + // return; + // } + // std::cout << "cuStreamSynchronize returned successfully!" << std::endl; + + // // Check for errors after synchronization + // cuda_err = cudaGetLastError(); + // if (cuda_err != cudaSuccess) { + // std::cerr << "ERROR: CUDA error after sync: " << cudaGetErrorString(cuda_err) << std::endl; + // return; + // } + + // std::cout << "Launched kernel" << std::endl; +} + +std::unordered_map& get_cached_launchers() +{ + static std::unordered_map launchers; + return launchers; +} diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu new file mode 100644 index 0000000000..6ab3a528c8 --- /dev/null +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "cuda.h" +#include "nvJitLink.h" + +namespace { +// We can make a better RAII wrapper around nvjitlinkhandle +void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) +{ + if (result != NVJITLINK_SUCCESS) { + std::cerr << "\n nvJITLink failed with error " << result << '\n'; + size_t log_size = 0; + result = nvJitLinkGetErrorLogSize(handle, &log_size); + if (result == NVJITLINK_SUCCESS && log_size > 0) { + std::unique_ptr log{new char[log_size]}; + result = nvJitLinkGetErrorLog(handle, log.get()); + if (result == NVJITLINK_SUCCESS) { + std::cerr << "AlgorithmPlanner nvJITLink error log: " << log.get() << '\n'; + } + } + exit(1); + } +} +} // namespace + +void AlgorithmPlanner::save_compute() +{ + std::cout << "Saving compute" << std::endl; + auto& db = fragment_database(); + std::cout << "DB size: " << db.cache.size() << std::endl; + std::cout << "Available keys in cache:" << std::endl; + for (const auto& pair : db.cache) { + std::cout << " " << pair.first << std::endl; + } + std::cout << "Finding key: " << this->name + "_" + this->params << std::endl; + auto val = db.cache.find(this->name + "_" + this->params); + if (val == db.cache.end()) { + std::cout << "Key not found" << std::endl; + return; + } + this->fragments.push_back(val->second.get()); + std::cout << "Fragment added with key: " << fragments.back()->compute_key << std::endl; + std::cout << "Fragments size: " << this->fragments.size() << std::endl; +} + +AlgorithmLauncher AlgorithmPlanner::get_launcher() +{ + std::cout << "Getting launcher" << std::endl; + auto& launchers = get_cached_launchers(); + auto key = this->name + "_" + this->params; + if (launchers.count(key) == 0) { + this->save_compute(); + launchers[key] = this->build(); + } + std::cout << "launcher key: " << key << std::endl; + return launchers[key]; + // this->save_compute(); + // return this->build(); +} + +AlgorithmLauncher AlgorithmPlanner::build() +{ + int device = 0; + int major = 0; + int minor = 0; + cudaGetDevice(&device); + cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device); + cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device); + + std::string archs = "-arch=sm_" + std::to_string((major * 10 + minor)); + + // Load the generated LTO IR and link them together + nvJitLinkHandle handle; + const char* lopts[] = {"-lto", archs.c_str()}; + auto result = nvJitLinkCreate(&handle, 2, lopts); + check_nvjitlink_result(handle, result); + + for (auto& frag : this->fragments) { + frag->add_to(handle); + } + + // Call to nvJitLinkComplete causes linker to link together all the LTO-IR + // modules perform any optimizations and generate cubin from it. + std::cout << "\tStarted LTO runtime linking \n"; + result = nvJitLinkComplete(handle); + check_nvjitlink_result(handle, result); + std::cout << "\tCompleted LTO runtime linking \n"; + + // get cubin from nvJitLink + size_t cubin_size; + result = nvJitLinkGetLinkedCubinSize(handle, &cubin_size); + check_nvjitlink_result(handle, result); + + std::unique_ptr cubin{new char[cubin_size]}; + result = nvJitLinkGetLinkedCubin(handle, cubin.get()); + check_nvjitlink_result(handle, result); + + result = nvJitLinkDestroy(&handle); + check_nvjitlink_result(handle, result); + + // cubin is linked, so now load it + CUlibrary library; + cuLibraryLoadData(&library, cubin.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); + + unsigned int count = 1; + // Still need to cache/compute the mangled name + std::unique_ptr kernels_{new CUkernel[count]}; + cuLibraryEnumerateKernels(kernels_.get(), count, library); + + return AlgorithmLauncher{library, kernels_[0]}; +} diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu new file mode 100644 index 0000000000..3c8fee591c --- /dev/null +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +FragmentDatabase::FragmentDatabase() {} + +bool FragmentDatabase::make_cache_entry(std::string const& name, std::string const& params) +{ + if (this->cache.count(name + "_" + params) == 0) { + this->cache[name + "_" + params] = std::unique_ptr{}; + return false; + } + return true; +} + +FragmentDatabase& fragment_database() +{ + // Left to the reader to make this thread safe + static FragmentDatabase database; + return database; +} + +void registerFatbinFragment(std::string const& algo, + std::string const& params, + unsigned char const* blob, + std::size_t size) +{ + auto& planner = fragment_database(); + auto entry_exists = planner.make_cache_entry(algo, params); + if (entry_exists) { return; } + std::cout << "Caching fatbin fragment: " << algo + "_" + params << std::endl; + planner.cache[algo + "_" + params] = std::make_unique(params, blob, size); +} diff --git a/cpp/src/detail/jit_lto/FragmentEntry.cu b/cpp/src/detail/jit_lto/FragmentEntry.cu new file mode 100644 index 0000000000..b76df1bd29 --- /dev/null +++ b/cpp/src/detail/jit_lto/FragmentEntry.cu @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +namespace { +// std::string make_compute_key(std::vector const& params) { +// std::string k{}; +// for (auto& p : params) { +// k += p + "_"; +// } +// return k; +// } + +// We can make a better RAII wrapper around nvjitlinkhandle +void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) +{ + if (result != NVJITLINK_SUCCESS) { + std::cerr << "\n nvJITLink failed with error " << result << '\n'; + size_t log_size = 0; + result = nvJitLinkGetErrorLogSize(handle, &log_size); + if (result == NVJITLINK_SUCCESS && log_size > 0) { + std::unique_ptr log{new char[log_size]}; + result = nvJitLinkGetErrorLog(handle, log.get()); + if (result == NVJITLINK_SUCCESS) { + std::cerr << "FragmentEntry nvJITLink error log: " << log.get() << '\n'; + } + } + exit(1); + } +} +} // namespace + +FragmentEntry::FragmentEntry(std::string const& params) : compute_key(params) {} + +// std::size_t FragmentEntryHash::operator()( +// std::vector const& params) const noexcept { +// return std::hash{}(make_compute_key(params)); +// } + +// bool FragmentEntryEqual::operator()( +// std::vector const& params, +// std::unique_ptr const& entry) const noexcept { +// if (params.size() == entry->compute_arg_count) { +// auto key = make_compute_key(params); +// return entry->compute_key == key; +// } +// return false; +// } + +FatbinFragmentEntry::FatbinFragmentEntry(std::string const& params, + unsigned char const* view, + std::size_t size) + : FragmentEntry(params), data_size(size), data_view(view) +{ +} + +bool FatbinFragmentEntry::add_to(nvJitLinkHandle& handle) const +{ + auto result = nvJitLinkAddData( + handle, NVJITLINK_INPUT_ANY, this->data_view, this->data_size, this->compute_key.c_str()); + + // Loading from file works + // So the issue is in our data_view / data_size + // auto result = nvJitLinkAddFile( + // handle, NVJITLINK_INPUT_ANY, + // "/home/rmaynard/Work/runtime_lto_examples/build/algorithms/CMakeFiles/" + // "algo_kernels.dir/kernels/sum_int32.fatbin"); + check_nvjitlink_result(handle, result); + return true; +} diff --git a/cpp/src/detail/jit_lto/MakeFragmentKey.cu b/cpp/src/detail/jit_lto/MakeFragmentKey.cu new file mode 100644 index 0000000000..020010bf21 --- /dev/null +++ b/cpp/src/detail/jit_lto/MakeFragmentKey.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define NVRTC_GET_TYPE_NAME 1 +// #include + +#include + +namespace detail { +// std::string nvrtc_name(std::type_info const& info) { +// std::string type_name; +// nvrtcGetTypeName(info, &type_name); +// return type_name; +// } +} // namespace detail diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index e3286ba548..8bf0d393a7 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -18,6 +18,7 @@ #include "../ivf_common.cuh" #include "../sample_filter.cuh" +#include "jit_lto_kernels/interleaved_scan_planner.hpp" #include #include @@ -972,8 +973,8 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) /** * Configure the gridDim.x to maximize GPU occupancy, but reduce the output size */ -template -uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMemSize, T func) +// template +uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMemSize, CUkernel func) { int dev_id; RAFT_CUDA_TRY(cudaGetDevice(&dev_id)); @@ -1017,16 +1018,20 @@ void launch_kernel(Lambda lambda, { RAFT_EXPECTS(Veclen == index.veclen(), "Configured Veclen does not match the index interleaving pattern."); - constexpr auto kKernel = interleaved_scan_kernel; + // constexpr auto kKernel = interleaved_scan_kernel; + auto kernel_planner = InterleavedScanPlanner( + Capacity, Veclen, Ascending, ComputeNorm); + auto kernel_launcher = kernel_planner.get_launcher(); + const int max_query_smem = 16384; int query_smem_elems = std::min(max_query_smem / sizeof(T), raft::Pow2::roundUp(index.dim())); @@ -1044,7 +1049,8 @@ void launch_kernel(Lambda lambda, constexpr uint32_t kMaxGridY = 32768; if (grid_dim_x == 0) { - grid_dim_x = configure_launch_x(std::min(kMaxGridY, num_queries), n_probes, smem_size, kKernel); + grid_dim_x = configure_launch_x( + std::min(kMaxGridY, num_queries), n_probes, smem_size, kernel_launcher.get_kernel()); return; } @@ -1060,22 +1066,42 @@ void launch_kernel(Lambda lambda, block_dim.x, n_probes, smem_size); - kKernel<<>>(lambda, - post_process, - query_smem_elems, - queries, - coarse_index, - index.data_ptrs().data_handle(), - index.list_sizes().data_handle(), - queries_offset + query_offset, - n_probes, - k, - max_samples, - chunk_indices, - index.dim(), - sample_filter, - neighbors, - distances); + // kKernel<<>>(lambda, + // post_process, + // query_smem_elems, + // queries, + // coarse_index, + // index.data_ptrs().data_handle(), + // index.list_sizes().data_handle(), + // queries_offset + query_offset, + // n_probes, + // k, + // max_samples, + // chunk_indices, + // index.dim(), + // sample_filter, + // neighbors, + // distances); + kernel_launcher(stream, + grid_dim, + block_dim, + smem_size, + lambda, + post_process, + query_smem_elems, + queries, + coarse_index, + index.data_ptrs().data_handle(), + index.list_sizes().data_handle(), + queries_offset + query_offset, + n_probes, + k, + max_samples, + chunk_indices, + index.dim(), + sample_filter, + neighbors, + distances); queries += grid_dim_y * index.dim(); if constexpr (Capacity > 0) { neighbors += grid_dim_y * grid_dim_x * k; diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md new file mode 100644 index 0000000000..117320693c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md @@ -0,0 +1,87 @@ +# Interleaved Scan Kernel Generation + +This directory contains the tools and generated files for creating CUDA kernel instantiations for the interleaved scan functionality. + +## Files + +- `interleaved_scan_kernels.txt` - List of kernel function signatures (1280 entries) +- `generate_kernels.py` - Python script to generate .cu files from the kernel list +- `interleaved_scan_kernel_*.cu` - Generated CUDA kernel files (1280 files) +- `generated_kernels.cmake` - CMake file with relative paths +- `CMakeLists_kernels.cmake` - CMake file with absolute paths + +## Usage + +### Regenerating Kernel Files + +To regenerate all kernel files: + +```bash +cd /path/to/cuvs/cpp/src/neighbors/ivf_flat/jit_lto_kernels +python3 generate_kernels.py +``` + +This will: +1. Parse `interleaved_scan_kernels.txt` +2. Generate 1280 `.cu` files +3. Create/update CMake files + +### Using in CMake + +Include the generated CMake file in your main CMakeLists.txt: + +```cmake +# Option 1: Relative paths +include(${CMAKE_CURRENT_SOURCE_DIR}/jit_lto_kernels/generated_kernels.cmake) + +# Option 2: Absolute paths +include(${CMAKE_CURRENT_SOURCE_DIR}/jit_lto_kernels/CMakeLists_kernels.cmake) + +# Use the variable +add_library(my_target ${INTERLEAVED_SCAN_KERNEL_FILES}) +``` + +## Template Parameters + +Each kernel is parameterized by 10 template arguments: + +1. **kBlockSize** (0, 1, 2, 4, 8, 16, 32, 64, 128, 256) +2. **VecLen** (1, 4, 8, 16) +3. **kManageLocalTopK** (true, false) +4. **kPrecompBaseDiff** (true, false) +5. **T** (float, __half, unsigned char, signed char) +6. **AccT** (float, __half, unsigned int, int) +7. **IdxT** (long) +8. **FilterT** (none_sample_filter → 'n', bitset_filter → 'b') +9. **DistanceT** (inner_prod_dist → 'inner_N', euclidean_dist → 'euclidean_N') +10. **FinalLambda** (identity_op → 'id', sqrt_op → 'sqrt', compose_op → 'compose') + +## Filename Convention + +Files follow the pattern: +``` +interleaved_scan_kernel__________.cu +``` + +Example: +``` +Template: <0, 1, false, false, float, float, long, none_sample_filter, inner_prod_dist<1>, identity_op> +Filename: interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu +``` + +## File Structure + +Each generated `.cu` file contains: + +1. **Apache 2.0 License Header** +2. **Include**: `#include "../ivf_flat_interleaved_scan.cuh"` +3. **Conditional compilation**: + - `#ifdef BUILD_KERNEL`: Template instantiation + - `#else`: Registration function for JIT/LTO system + +## Notes + +- All files are generated in the same directory as the script +- The script automatically creates CMake files with all generated filenames +- Progress is printed every 100 files during generation +- Files are sorted alphabetically in the CMake lists diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py new file mode 100644 index 0000000000..c6702e0f18 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py @@ -0,0 +1,272 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + + +#!/usr/bin/env python3 +""" +Simplified script to generate CUDA kernel files for interleaved_scan_kernel instantiations. +Reads from interleaved_scan_kernels.txt and generates individual .cu files. +""" + +import re +import os +from pathlib import Path + + +def parse_template_parameters(template_str): + """Parse template parameters from a template string with nested templates.""" + params = [] + current_param = '' + depth = 0 + + for char in template_str: + if char == '<': + depth += 1 + elif char == '>': + depth -= 1 + elif char == ',' and depth == 0: + params.append(current_param.strip()) + current_param = '' + continue + current_param += char + + if current_param: + params.append(current_param.strip()) + + return params + + +def get_type_abbreviation(type_str): + """Get abbreviation for type names.""" + type_map = { + 'float': 'f', + '__half': 'h', + 'unsigned char': 'uc', + 'signed char': 'sc', + 'unsigned int': 'ui', + 'int': 'i', + 'long': 'l' + } + return type_map.get(type_str, type_str) + + +def get_filter_abbreviation(filter_str): + """Get abbreviation for filter types.""" + if 'none_sample_filter' in filter_str: + return 'n' + elif 'bitset_filter' in filter_str: + return 'b' + return 'unknown' + + +def get_distance_abbreviation(dist_str): + """Get abbreviation for distance metric types.""" + if 'inner_prod_dist' in dist_str: + match = re.search(r'inner_prod_dist<(\d+),', dist_str) + if match: + return f'inner_{match.group(1)}' + elif 'euclidean_dist' in dist_str: + match = re.search(r'euclidean_dist<(\d+),', dist_str) + if match: + return f'euclidean_{match.group(1)}' + return 'unknown' + + +def get_final_op_abbreviation(op_str): + """Get abbreviation for final operator types.""" + if 'identity_op' in op_str: + return 'id' + elif 'sqrt_op' in op_str: + return 'sqrt' + elif 'compose_op' in op_str: + return 'compose' + return 'unknown' + + +def generate_filename(params): + """Generate filename from template parameters.""" + # params[0]: kBlockSize (numeric) + # params[1]: VecLen (numeric) + # params[2]: kManageLocalTopK (bool) + # params[3]: kPrecompBaseDiff (bool) + # params[4]: T (type) + # params[5]: AccT (type) + # params[6]: IdxT (type) + # params[7]: FilterT (filter type) + # params[8]: DistanceT (distance metric) + # params[9]: FinalLambda (final operator) + + parts = [ + params[0], # kBlockSize + params[1], # VecLen + params[2], # kManageLocalTopK + params[3], # kPrecompBaseDiff + get_type_abbreviation(params[4]), # T + get_type_abbreviation(params[5]), # AccT + get_type_abbreviation(params[6]), # IdxT + get_filter_abbreviation(params[7]), # FilterT + get_distance_abbreviation(params[8]), # DistanceT + get_final_op_abbreviation(params[9]) # FinalLambda + ] + + return f"interleaved_scan_kernel_{'_'.join(parts)}.cu" + + +def generate_register_function_name(params): + """Generate the registration function name from template parameters.""" + parts = [ + params[0], # kBlockSize + params[1], # VecLen + params[2], # kManageLocalTopK + params[3], # kPrecompBaseDiff + get_type_abbreviation(params[4]), # T + get_type_abbreviation(params[5]), # AccT + get_type_abbreviation(params[6]), # IdxT + get_filter_abbreviation(params[7]), # FilterT + get_distance_abbreviation(params[8]), # DistanceT + get_final_op_abbreviation(params[9]) # FinalLambda + ] + + return f"interleaved_scan_kernel_{'_'.join(parts)}" + + +def generate_cuda_file_content(params): + """Generate the content of a CUDA kernel file.""" + filename = generate_register_function_name(params) + embedded_var_name = f"embedded_{filename}" + + # Format template parameters for the template instantiation (all 10 params) + template_params = ', '.join(params) + + # Format template parameters for registerAlgorithm (params 4-9, excluding first four) + register_template_params = ', '.join(params[4:]) + + # Create the string parameter with first four params (Capacity, Veclen, Ascending, ComputeNorm) + string_param = f"interleaved_scan_kernel_{params[0]}_{params[1]}_{params[2]}_{params[3]}" + + content = f"""/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<{template_params}>({params[8]}, {params[9]}, unsigned int, {params[4]} const*, unsigned int const*, {params[4]} const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, {params[7]}, unsigned int*, float*); + +#else + +#include "{filename}.h" +#include + +__attribute__((__constructor__)) static void register_{filename}() +{{ +registerAlgorithm< + {register_template_params}>("{string_param}", + {embedded_var_name}, + sizeof({embedded_var_name})); +}} + +#endif +""" + + return content + + +def main(): + # Read the kernels file (now in the same directory) + kernels_file = Path('interleaved_scan_kernels.txt') + if not kernels_file.exists(): + print(f"Error: {kernels_file} not found!") + return + + with open(kernels_file, 'r') as f: + lines = f.readlines() + + # Output directory (current directory) + output_dir = Path('.') + + # Parse all kernels and generate files + generated_files = [] + + for line_num, line in enumerate(lines, 1): + line = line.strip() + if not line: + continue + + # Extract the full template from the function signature + start = line.find('interleaved_scan_kernel<') + if start == -1: + continue + + start += len('interleaved_scan_kernel<') + depth = 1 + end = start + + while depth > 0 and end < len(line): + if line[end] == '<': + depth += 1 + elif line[end] == '>': + depth -= 1 + end += 1 + + template_str = line[start:end-1] + params = parse_template_parameters(template_str) + + if len(params) != 10: + print(f"Warning: Line {line_num} has {len(params)} parameters, expected 10") + continue + + # Generate filename and content + filename = generate_filename(params) + file_content = generate_cuda_file_content(params) + + # Write file + output_file = output_dir / filename + with open(output_file, 'w') as f: + f.write(file_content) + + generated_files.append(filename) + + if line_num % 100 == 0: + print(f"Generated {line_num} files...") + + print(f"\nGenerated {len(generated_files)} CUDA kernel files") + + # Generate CMake file with all filenames + cmake_file = Path('../../../../cmake/jit_lto_kernels_list') / 'interleaved_scan.cmake' + with open(cmake_file, 'w') as f: + f.write("# Auto-generated list of interleaved scan kernel files\n") + f.write("# Generated by generate_kernels.py\n\n") + f.write("set(INTERLEAVED_SCAN_KERNEL_FILES\n") + for filename in sorted(generated_files): + f.write(f" src/neighbors/ivf_flat/jit_lto_kernels/{filename}\n") + f.write(")\n") + + print(f"Generated CMake file: {cmake_file}") + +if __name__ == '__main__': + main() diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..d00bc40450 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..f0e2e2f906 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..0179171be9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..59d418bc01 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..4775a5f361 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..0c28b4164f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..943fd3247a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..8df6900f3d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..f9a7e499c6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..3caf608abe --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..959080398a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..de88e310b0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_false_false", + embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..46c0cb5193 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_16_false_true", + embedded_interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..35b2de35d1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_16_false_true", + embedded_interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..67c9d2010c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_16_false_true", + embedded_interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..c1630265e6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_16_false_true", + embedded_interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..65ff443459 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..290f5889bb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..278b9d8fc6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..c66eda1c33 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..bdda3c39bf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..211bb65cd7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..d72f32931b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..ce7009d993 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..af46ae904e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..c0914e8406 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..f2c57778cd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..1c5d592e0d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_16_true_false", + embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..970656ed25 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_16_true_true", + embedded_interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..6c90ed5644 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_16_true_true", + embedded_interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..d11dd45984 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_16_true_true", + embedded_interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..63c5c08309 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_16_true_true", + embedded_interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..da4f8038d5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..33e8352483 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..580fb44149 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..edc330b61a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..73ef0a45d0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..34327a4358 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..f0b9914492 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..a9bd061376 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..491bc49ed7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..fb9f4df57e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..4f089adb87 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..8ed094b4eb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..778b3e740f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..dea20518c0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..1acd28fd47 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..4c5f70f24d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..5a18844a55 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..154955c2d0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..488ef48da4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..692911cbbc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..e305ae7102 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..7e66cee018 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e544ac9ade --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..6c274ae690 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_false_false", + embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..ed6bec67d3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_false_true", + embedded_interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..17295b7515 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_false_true", + embedded_interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..1bb3691c13 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_false_true", + embedded_interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..d2e16b5adf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_false_true", + embedded_interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..81a880b7b2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_false_true", + embedded_interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..48ab435ceb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_false_true", + embedded_interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..6d32258ebc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_false_true", + embedded_interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..82f015e63b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_false_true", + embedded_interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..cb10fc094a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..d49001e12a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..ce9973a95c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..b92dc63a75 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..5e901504a6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..c0e3e48723 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..3ff28fd79a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..5c20937d09 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..8eab1fa199 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..e0b6c12be8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..4d1a11888b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..a0d86086b2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..907a9f521f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..67c6d47c03 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..92d1f60583 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm< + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>("interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..b388686f90 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..b1c0ff0ac1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..f8ebb0e5d3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..fa85e1fe3f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..aa3c9475c1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..cf0d8d0f37 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..6190621778 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..0e7de664ec --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..795b9cd608 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_0_1_true_false", + embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..476e0135c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_true_true", + embedded_interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..bd58d97e16 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_true_true", + embedded_interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..aae27d0152 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_true_true", + embedded_interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..50e0352998 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_true_true", + embedded_interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..a91c046f41 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_true_true", + embedded_interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..c95b0c10af --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_true_true", + embedded_interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..4b8fcf4c02 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_true_true", + embedded_interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..ae2d745de5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_1_true_true", + embedded_interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..2b2b96ace9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_4_false_false", + embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..83d8af9ca3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_4_false_false", + embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..f2d0e98389 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_0_4_false_false", + embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..cc37fff00b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_4_false_false", + embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..c6db4f78b9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_4_false_false", + embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..422b31652c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_4_false_false", + embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..f01a4fa5a6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_4_false_true", + embedded_interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..0f662c6948 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_4_false_true", + embedded_interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..a2676facf3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_4_true_false", + embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..7962f45919 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_4_true_false", + embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..05d05e32bd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_0_4_true_false", + embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..aa9f0e22e0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_4_true_false", + embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..da8a232009 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_4_true_false", + embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..0ffc4f4696 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_0_4_true_false", + embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..461723b693 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_4_true_true", + embedded_interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..c1464eb2da --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_4_true_true", + embedded_interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..f54e2f537e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_8_false_false", + embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..a956572f36 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_8_false_false", + embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..cebf4d8171 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_0_8_false_false", + embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..92c12d03af --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_8_false_false", + embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..6bd137d57f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_8_false_false", + embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..09fa2e4b0a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_8_false_false", + embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..3e064b22a7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_8_false_true", + embedded_interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..66fec75011 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_8_false_true", + embedded_interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..70f49c517c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_8_true_false", + embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..76e2117710 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_8_true_false", + embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..f2e28dbb1c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_0_8_true_false", + embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..2f191f1acd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_8_true_false", + embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..dd8cca7f9d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_0_8_true_false", + embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..ba222c208e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_0_8_true_false", + embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..ff4c740d2a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_8_true_true", + embedded_interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..2c52558937 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 0, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_0_8_true_true", + embedded_interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..87f3ad2eec --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..e027047b07 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..95ae978726 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..c2b8bf3ff0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..34395eef54 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..9511e59037 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..e59ab34eb3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..835c111636 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..c1fa3db4bc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..01f4cbb69d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..a5f815a11c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..a380f12584 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_false_false", + embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..2ef7a760e1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_16_false_true", + embedded_interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..6983e19c2c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_16_false_true", + embedded_interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..10f6f98154 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_16_false_true", + embedded_interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..b8556d8387 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_16_false_true", + embedded_interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..72e9167bf6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..4f0b292597 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..57a3df97e0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..68f2aa96a7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..af086e5511 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..2ef3c98ff8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..1f6d0aac12 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..2172ef974d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..baac53ad25 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..852f0ffd67 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..6916ae7446 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..cb5d1422b4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_16_true_false", + embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..5402e8f0c7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_16_true_true", + embedded_interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..73ad7f4081 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_16_true_true", + embedded_interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..df0231ddbb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_16_true_true", + embedded_interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..e22fa0925d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_16_true_true", + embedded_interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..b37f8190ca --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..24859b224a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..97dc103c39 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..1578667fb1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..ee000933d6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..e2941f7e3f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..3295168c4a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..79d3caad57 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..6e2f602a12 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..a72f773b20 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..d864dc19ac --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..3f41496c69 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..d44c107782 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..bb35a3534c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..fbba323559 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..ee06503981 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e411695221 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..c7a4d2092d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..cfb94b212f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..c7dbe0ca1f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..78772dbf04 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..7517105d71 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..7cbecb2a4e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..83bbb93cf8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_false_false", + embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..4a3e900608 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_false_true", + embedded_interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..7793be4371 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_false_true", + embedded_interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..48bb13f1cb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_false_true", + embedded_interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..b27d5fb080 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_false_true", + embedded_interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..a23ff75f32 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_false_true", + embedded_interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..d094565850 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_false_true", + embedded_interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..f41f4fcc1c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_false_true", + embedded_interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..05dbb5ef68 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_false_true", + embedded_interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..1a52df7dc0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..278dbeb71d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..3b7fb0f9fe --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..bf9c5077a5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..fe1926a544 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..d9d6a1f65e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..653ced6fdb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..bf6a8a1c26 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..f751e803a6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..8ed4dce780 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..d014f48b27 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..38b815194e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..650e48e9ac --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..f3d53e83b1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..4d35af67c1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..de1fcb6a36 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..6340c2511e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..febd9c7363 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..b4197e7fae --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..34fbc7f821 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..a9d949c1ad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..7e6962b055 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..d95aff51fd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..86df80ac87 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_128_1_true_false", + embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..42d31e7ed0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_true_true", + embedded_interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..d19bbcac33 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_true_true", + embedded_interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..18e99098ef --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_true_true", + embedded_interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..e597e0a0aa --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_true_true", + embedded_interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..c0f8acb75c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_true_true", + embedded_interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..9e798b0dfa --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_true_true", + embedded_interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..9973ca8acc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_true_true", + embedded_interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..4a073ed5f5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_1_true_true", + embedded_interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..e57312873e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_4_false_false", + embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..36df23bd98 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_4_false_false", + embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..ff3c668629 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_4_false_false", + embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..09f044e481 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_4_false_false", + embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..bb15eedb97 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_4_false_false", + embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..7f5a871914 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_4_false_false", + embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..b0f03304f3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_4_false_true", + embedded_interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..543cb0f8f3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_4_false_true", + embedded_interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..2016405c84 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_4_true_false", + embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..73e5ca4f0d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_4_true_false", + embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..c624c73c3a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_4_true_false", + embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..bf76e1ac70 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_4_true_false", + embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..45a5acd43f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_4_true_false", + embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..2c9ea7b93e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_128_4_true_false", + embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..7d2b9bfe88 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_4_true_true", + embedded_interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..a035a55018 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_4_true_true", + embedded_interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..8eab95f347 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_8_false_false", + embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..9fb210445a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_8_false_false", + embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..c91008868a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_8_false_false", + embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..2281cf6e4f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_8_false_false", + embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..427175bd6d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_8_false_false", + embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..337728e6f5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_8_false_false", + embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..5b708524ed --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_8_false_true", + embedded_interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..1b5847c76f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_8_false_true", + embedded_interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..8a8a7e9324 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_8_true_false", + embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..d28209a103 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_8_true_false", + embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..8ef55b7351 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_8_true_false", + embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..03da5806be --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_8_true_false", + embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..2ed48617ce --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_128_8_true_false", + embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..d8ac6bc9da --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_128_8_true_false", + embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..2a58caa110 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_8_true_true", + embedded_interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..c069a00047 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 128, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_128_8_true_true", + embedded_interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..92eefc2039 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..1413cdafa4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..abb27de5ca --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..fa37cb8b61 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..d0e53d186b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..64d73b4495 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..4dc43726a8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..604d6139fd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..c42a8d7272 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..db6b777b6c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..5f395a56b7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..253acbffad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_false_false", + embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..d9e0708b33 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_16_false_true", + embedded_interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..6f0260aa00 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_16_false_true", + embedded_interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..191099a256 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_16_false_true", + embedded_interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..16245b7e86 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_16_false_true", + embedded_interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..653cf15163 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..cdbd9ea8bb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..3b1bb91633 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..dafacc91f0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..021e33b021 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..5daf8f72fc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..3e50b13e2e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..bc97285469 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..f057ad2263 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..0aaa596db2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..1f4dbb563f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..552d5a50a5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_16_true_false", + embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..758857e205 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_16_true_true", + embedded_interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..9afa17102e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_16_true_true", + embedded_interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..342d405807 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_16_true_true", + embedded_interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..60441c53af --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_16_true_true", + embedded_interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..acf382fc91 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..5f83c251e3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..d5f74b39a2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..9bbc77bd1e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..a77770d297 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..f00c25a49c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..22b026d4ab --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..a36fcc927b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..55cab64d9c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..d1679f93fc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..800628e98e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..9371730a2f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..10a70a4f71 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..31580d2a4c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..a7ba0c42a8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..b793c66b94 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..fcae8b176a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..2603801a52 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..e66b965708 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..bbab35578d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..5b09b40e14 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..de74044313 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..7ad19ce463 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..3442337c80 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_false_false", + embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..b18813def3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_false_true", + embedded_interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..8faaa15429 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_false_true", + embedded_interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..22e6bab8f6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_false_true", + embedded_interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..108a8f7b7d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_false_true", + embedded_interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..163cd3dfa2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_false_true", + embedded_interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..e9f6150014 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_false_true", + embedded_interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..be90918baf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_false_true", + embedded_interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..5c20a24283 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_false_true", + embedded_interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..854f544e4b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..583fbe3341 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..b0665ac932 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..7960cd58e5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..71d11af7ad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..4a7ddb65e4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..493f330640 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..6d204dcace --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..d405cf90c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..a4c76a451f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..9b97001867 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..d5b2f36a46 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..4abed39251 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..ab06d74e29 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..bdcbeec0cb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..c5ab6751b3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..a9b3668479 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..e17836193d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..3f6656a74c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..981d046ce8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..5d82b1de54 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..4def8a05cb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..073e517b5e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..aab4de1489 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_16_1_true_false", + embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..3f8dfe9c9a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_true_true", + embedded_interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..bece6b1112 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_true_true", + embedded_interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..7cb5d6fab6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_true_true", + embedded_interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..d5a6d4144e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_true_true", + embedded_interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..ece693de83 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_true_true", + embedded_interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..353dacd3c5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_true_true", + embedded_interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..675a02825e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_true_true", + embedded_interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..2697ef00f9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_1_true_true", + embedded_interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..250e0b0c9e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_4_false_false", + embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..9e148b175b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_4_false_false", + embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..90a90ea7ad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_4_false_false", + embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..68009aae5e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_4_false_false", + embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..18aaba6e87 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_4_false_false", + embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..dee3e092ee --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_4_false_false", + embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..63c633165b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_4_false_true", + embedded_interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..921cdcafaf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_4_false_true", + embedded_interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..d7c04730dc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_4_true_false", + embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..cfe442710f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_4_true_false", + embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..2932e3dec6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_16_4_true_false", + embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..0bca2a8d06 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_4_true_false", + embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..b05dc7a4e0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_4_true_false", + embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..616cc70c6f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_16_4_true_false", + embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..96465efddc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_4_true_true", + embedded_interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..e88abbb91c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_4_true_true", + embedded_interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..bbab804bd8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_8_false_false", + embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..108772575d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_8_false_false", + embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..1d7367270e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_8_false_false", + embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..0a93971cad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_8_false_false", + embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..180d653306 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_8_false_false", + embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..26a619cf6e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_8_false_false", + embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..a46ca7cdaa --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_8_false_true", + embedded_interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..c8c8c5a3c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_8_false_true", + embedded_interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..470fb87186 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_8_true_false", + embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..32456a1b61 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_8_true_false", + embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..b97bc1b909 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_16_8_true_false", + embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..8348b1a06a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_8_true_false", + embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..8847ca90ad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_16_8_true_false", + embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..c3cc978542 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_16_8_true_false", + embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..53e83996a1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_8_true_true", + embedded_interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..f1549bc17d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 16, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_16_8_true_true", + embedded_interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..d313b313a7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..bfe6bdfcc7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..6ec34f5a9e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..7442e84634 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..99a090ed4b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..93bc8ec319 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..9688313a02 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..195350cd5d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..c36ee524b0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..120fc361fe --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..85e62e4717 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..8bfac9f336 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_false_false", + embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..a2d5e7748d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_16_false_true", + embedded_interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..d3ca2cb6a2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_16_false_true", + embedded_interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..59cb6e0d81 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_16_false_true", + embedded_interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..82b146a4ef --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_16_false_true", + embedded_interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..b34266cd12 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..4554e91ede --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..b064db1bbc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..1b94324b95 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..60bf7eb50a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..0e8416441e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..62077201fc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..e4f1693464 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..40a662bb23 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..2bd5f5fed9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..67c5ef3cd8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..d16449cf36 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_16_true_false", + embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..cfc27da409 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_16_true_true", + embedded_interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..0067a25bfa --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_16_true_true", + embedded_interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..3e5cf49c48 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_16_true_true", + embedded_interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..f79920d34d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_16_true_true", + embedded_interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..976a7f0e1c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e87f292422 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..adc59f0f6c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..7c24d7dd56 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..c41d2b365b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..e1b95d5bb2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..82ff7537b5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..5c64bdefba --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..3b9f319648 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..4469a2e956 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..6681a34e72 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..bff74da591 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..5296798d1f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..6988150da8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..b68a1bf0b3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..fabc4942b6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..c64303a78d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..6786bc3e83 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..133112f435 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..7fa523d13b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..e6b0975d90 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..1f2c1e55d8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..4395e4a561 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..0161055330 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_false_false", + embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..2a57362c41 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_false_true", + embedded_interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..1d71639470 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_false_true", + embedded_interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..cc3f2abbb8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_false_true", + embedded_interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..db73777fdf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_false_true", + embedded_interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..12acd5f1ed --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_false_true", + embedded_interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..3bc8461ffc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_false_true", + embedded_interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..dd5be50218 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_false_true", + embedded_interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..8967bb9f7e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_false_true", + embedded_interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..687a786004 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..4284a60429 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..ece0076470 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..d0d19a9db4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..609f7d177d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..0decac2999 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..98b24a0a68 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..8130c8ea70 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..bd68489509 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..05f09d8310 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..7ffb1dbd6b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..ce25bf8bb7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..da687ef63c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..d808a82d52 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..370b5db447 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm< + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>("interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..47081c5141 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..11dfbde90c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..285780ba31 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..895c216a37 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..6c435e5c7a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..91f99813b1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..07fb6fa949 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..ff21d82fe2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..deadb211bf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_1_1_true_false", + embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..e3ebdb71c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_true_true", + embedded_interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..10299f9d02 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_true_true", + embedded_interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..c35d53476f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_true_true", + embedded_interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..ca3dd1b462 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_true_true", + embedded_interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..acccf76a18 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_true_true", + embedded_interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..604450177c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_true_true", + embedded_interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..ae17907237 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_true_true", + embedded_interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..67ebe7b2d6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_1_true_true", + embedded_interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..0546f1a99d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_4_false_false", + embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..7ae054b18c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_4_false_false", + embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..fa40b13fee --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_1_4_false_false", + embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..a833d09554 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_4_false_false", + embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..fb8152448e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_4_false_false", + embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..ac580f6066 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_4_false_false", + embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..32ebdce235 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_4_false_true", + embedded_interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..8d4c94c2d2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_4_false_true", + embedded_interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..c671d9c54a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_4_true_false", + embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..14130e2c68 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_4_true_false", + embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..7823c9712d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_1_4_true_false", + embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..8cd1de90c7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_4_true_false", + embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..0c962ce69c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_4_true_false", + embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..864e1d21d7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_1_4_true_false", + embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..6a938e9254 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_4_true_true", + embedded_interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..92e9c626ad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_4_true_true", + embedded_interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..399b4615a6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_8_false_false", + embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..23269a4bf5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_8_false_false", + embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..f52f11e39d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_1_8_false_false", + embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..e071932889 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_8_false_false", + embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..7b8d7cc5a9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_8_false_false", + embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..2c20f0c62b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_8_false_false", + embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..1c2437b39e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_8_false_true", + embedded_interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..1c0f18d060 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_8_false_true", + embedded_interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..08022a0fb5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_8_true_false", + embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..edd92daa39 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_8_true_false", + embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..4b731550c9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_1_8_true_false", + embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..77d80a328f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_8_true_false", + embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..a62a8e96f0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_1_8_true_false", + embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..351209b2e1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_1_8_true_false", + embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..beef9b5aad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_8_true_true", + embedded_interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..2a63e36aec --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 1, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_1_8_true_true", + embedded_interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..2333d6b8e0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..920f2a7ec8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..a9883674fc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..7597579be0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..897cf3b156 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..5aed2efb0c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..bcb3380592 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..2aca305bde --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..a21e7615bf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..e8785f8db9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..890fed00c3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..887593c2e7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_false_false", + embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..fb6f191657 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_16_false_true", + embedded_interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..69824c409f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_16_false_true", + embedded_interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..d444814b43 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_16_false_true", + embedded_interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..65bcf4285e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_16_false_true", + embedded_interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..91bfdded0e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..1fc0fe4202 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..1581716962 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..1b0d7b4792 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..914f2088e5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..c9bc56d2ee --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..5089982bd9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..8332c68c41 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..93cc64df3c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..19a66eccdb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..8e712a4dea --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..d9cf9037de --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_16_true_false", + embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..55d904bd1c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_16_true_true", + embedded_interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..310294888a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_16_true_true", + embedded_interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..0b875f9278 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_16_true_true", + embedded_interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..f8a526cecc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_16_true_true", + embedded_interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..27892d3882 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..1bbbea185f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..b2f3329df8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..4fbe8473c0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..fc84311858 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..4756b6afe1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..9a28d4df87 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..4c188e2f37 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..4d9dddd0f1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..5c5fef9b1e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e20df45ffc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..e23e98b699 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..aa71191abd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..bd755c5131 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..4aa45ac61b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..5bbbe357d0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..58f80c0744 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..a51683b078 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..a45d054588 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..d787a38d89 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..993c049805 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..04d501aaba --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..ecf6bfd879 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..6a672aa224 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_false_false", + embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..51f7020bbf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_false_true", + embedded_interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..df022fc329 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_false_true", + embedded_interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..9c6e78d06c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_false_true", + embedded_interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..e791b5f434 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_false_true", + embedded_interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..302c981f8c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_false_true", + embedded_interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..c596a720d5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_false_true", + embedded_interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..47d446651e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_false_true", + embedded_interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..03a8571341 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_false_true", + embedded_interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..2565dcc23b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..ef7c625f19 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..4a3e67d690 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..7d249e12c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..4c852c3574 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..770fa60d3b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..6221cd3450 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..8a7691e777 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..07a5bee770 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..e88fcd9729 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..55aae51950 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..99c4f33b58 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..ff4da51ccd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..91a073d140 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..7bdd72fd4a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..e2dfc52ae6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..961f1d677e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..5205a223dd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..b7b2526d3b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..6f9e638ef2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..7b18b874ff --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..deb65e968e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..a7b59706d3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..9bb5161ab2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_256_1_true_false", + embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..6272eb87c8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_true_true", + embedded_interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..2c5edd6258 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_true_true", + embedded_interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..b1cd6f2eb6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_true_true", + embedded_interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..898783af74 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_true_true", + embedded_interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..4ff2af5fc0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_true_true", + embedded_interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..95f4a432ca --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_true_true", + embedded_interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..4fc319d843 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_true_true", + embedded_interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..0873fa7234 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_1_true_true", + embedded_interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..1930040548 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_4_false_false", + embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..edfb18876a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_4_false_false", + embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..2851a535e7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_4_false_false", + embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..324f0e0066 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_4_false_false", + embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..71fd238fa6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_4_false_false", + embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..9e41fc3142 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_4_false_false", + embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..916dc598ed --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_4_false_true", + embedded_interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..042ae90f1e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_4_false_true", + embedded_interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..1ea25107f3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_4_true_false", + embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..623e7fcec2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_4_true_false", + embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..adf12e96c2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_4_true_false", + embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..5b42d70628 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_4_true_false", + embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..b7952ac332 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_4_true_false", + embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..de5336f36a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_256_4_true_false", + embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..34fd019ddc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_4_true_true", + embedded_interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..7a5aba65b1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_4_true_true", + embedded_interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..302773ac7c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_8_false_false", + embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..5ed74f57f4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_8_false_false", + embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..f6bb6cc680 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_8_false_false", + embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..75fdc6980b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_8_false_false", + embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..61c94df89f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_8_false_false", + embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..c889f57f48 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_8_false_false", + embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..6abbca5f4d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_8_false_true", + embedded_interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..5e4ceb3a6c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_8_false_true", + embedded_interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..7b56cd27ac --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_8_true_false", + embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..2a6accb3db --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_8_true_false", + embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..577d60c51a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_8_true_false", + embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..384cca6d9c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_8_true_false", + embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..fcca5a5df7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_256_8_true_false", + embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..5ee3dc5eb8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_256_8_true_false", + embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..3b75ca6b4a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_8_true_true", + embedded_interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..f04373839e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 256, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_256_8_true_true", + embedded_interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..851dafea08 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..326d16a5b3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..03b0fb1094 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..84b62997e8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..4592a42bfc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..e583c98a16 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..e4399ffeed --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..0f8c6a6539 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..eba35b2feb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..74210afadf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..112c5ceb3b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..c7c9150318 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_false_false", + embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..e09a83dbf5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_16_false_true", + embedded_interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..9c28eee47a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_16_false_true", + embedded_interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..657cd8a30f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_16_false_true", + embedded_interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..5acaf35711 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_16_false_true", + embedded_interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..b9eeced6c7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..a11aa58461 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..ca7f591f82 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..9d7964db0e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..3fad4569a6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..6a41f637cf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..b657a3054b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..de0305103d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..ff95ad66b5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..85a07e6c1f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..b33bf7dfaa --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..df8b79da7d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_16_true_false", + embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..12430d9ccf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_16_true_true", + embedded_interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..f34d479777 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_16_true_true", + embedded_interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..d3def5e132 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_16_true_true", + embedded_interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..4c1918727d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_16_true_true", + embedded_interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..3b1a844eb3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..df6a7c7bbf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..e5577edebc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..dad28040c7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..0c40e25845 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..7e524ddf4f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..bb3db05e08 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..2e736f9dbd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..32f734cad3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..052119a7cb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..71400a0303 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..b14d644983 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..99d128ea01 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..5570d00fda --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..3932281de2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..7fea0189db --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..496c158434 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..65c152c081 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..4d7653d46d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..96a79e0c64 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..aeadb7ec54 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..7b4757c326 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..7aff0d2a18 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..ce6f039b3d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_false_false", + embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..df68427895 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_false_true", + embedded_interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..68458a3f35 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_false_true", + embedded_interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..b4e685fc87 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_false_true", + embedded_interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..dc30eb63a8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_false_true", + embedded_interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..b956b82daf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_false_true", + embedded_interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..428ad7e950 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_false_true", + embedded_interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..9b22f6c4ba --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_false_true", + embedded_interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..3418b4c604 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_false_true", + embedded_interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..5072ea8489 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..37769c2376 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..56a3448df1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..0f34bc8104 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..af5984cacb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..126aa6791a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..6404610b13 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..699a2d42dd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..c38e2b0ed7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..d7f5c0d6b4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..5a00e165b3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..2a639b3d28 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..d7d3984312 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..61ad677621 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..4819f16668 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm< + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>("interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..e5283b327e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..84ab7877b8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..0aa0e7b30d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..8d0be68864 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..c066fbada5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..09d64286ee --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..b384a02bad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..c28d6bd6a2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..cc4d935699 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_2_1_true_false", + embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..906814d41a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_true_true", + embedded_interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..8eb5c381e1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_true_true", + embedded_interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..ae8bc90488 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_true_true", + embedded_interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..9e3b46217d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_true_true", + embedded_interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..5e017873dc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_true_true", + embedded_interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..2451e28019 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_true_true", + embedded_interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..146a0eb20f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_true_true", + embedded_interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..6e0b62afe0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_1_true_true", + embedded_interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..7ded12cdd6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_4_false_false", + embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..9204daab66 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_4_false_false", + embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..969c9f3ee2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_2_4_false_false", + embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..4f736cd922 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_4_false_false", + embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..80f6cfe7bd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_4_false_false", + embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..2eed038108 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_4_false_false", + embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..4021ac8966 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_4_false_true", + embedded_interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..1e15f114fb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_4_false_true", + embedded_interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..70c8ee1e6a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_4_true_false", + embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..395872dd06 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_4_true_false", + embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..9e4ae1f7c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_2_4_true_false", + embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..6bb0142b9f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_4_true_false", + embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..bfea159adc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_4_true_false", + embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..fe52b2dc8b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_2_4_true_false", + embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..a48306da96 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_4_true_true", + embedded_interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..56d0031442 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_4_true_true", + embedded_interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..2f89ecd349 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_8_false_false", + embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..15b0b99703 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_8_false_false", + embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..6011add826 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_2_8_false_false", + embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..64655ed240 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_8_false_false", + embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..aa125344d6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_8_false_false", + embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..6977c22672 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_8_false_false", + embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..3472906a7a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_8_false_true", + embedded_interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..4059e7f7aa --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_8_false_true", + embedded_interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..3ffc972145 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_8_true_false", + embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..a2d8a0017c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_8_true_false", + embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..1f5e5ef8eb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_2_8_true_false", + embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..250f5ba2fb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_8_true_false", + embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..e0a21e5844 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_2_8_true_false", + embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..639863a249 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_2_8_true_false", + embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..b68da192b7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_8_true_true", + embedded_interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..deb97e906b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 2, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_2_8_true_true", + embedded_interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..745d6066c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..a2a1aa9c6b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..891582bab8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..bce4e73467 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..e6f862dc87 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..f5b62cc4a5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..db123f8801 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..7505c39c6f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..5843f0b453 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..7d70eddfa7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..a746a02ff7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..607b20ffc4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_false_false", + embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..7d1cb0e9fc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_16_false_true", + embedded_interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..46666f348c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_16_false_true", + embedded_interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..c86a738598 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_16_false_true", + embedded_interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..73f62fdab1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_16_false_true", + embedded_interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..74ddf7f2f9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..40488d2241 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..f4dafe3b38 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..28b7639880 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..eab5239a40 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..195d356657 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..87d40dab4b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..1cec12a330 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..720f30229b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..54510be430 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..cfc3f6dff6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..45eb046fea --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_16_true_false", + embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..c188fd475d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_16_true_true", + embedded_interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..c5baabba3f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_16_true_true", + embedded_interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..0f779800ba --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_16_true_true", + embedded_interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..961b4706e6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_16_true_true", + embedded_interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..20ebf95311 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e1f94ae722 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..49cf8aa86f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..4e81abbb92 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e8b07bbaf4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..633bdf3dd4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..faf17652c1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..9dc77b44fa --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..7eb6621c05 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..e2f0627bcb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e5ba767b80 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..b1333e2245 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..7aa2dd5c1c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..c990505ef0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..c62d960518 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..74db5c79c9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e4f39d3438 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..0976c9ebd8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..f8eac8eabe --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e0a1c292c2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..b03f77a8a3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..a99ff9843e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..955d462abe --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..80f260c6be --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_false_false", + embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..167e921cb3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_false_true", + embedded_interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..e8a2e2225b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_false_true", + embedded_interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..fc4e2199c1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_false_true", + embedded_interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..3949bfcf4f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_false_true", + embedded_interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..b4274a2a1f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_false_true", + embedded_interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..c048d1443c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_false_true", + embedded_interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..e369ef6a01 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_false_true", + embedded_interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..712a540289 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_false_true", + embedded_interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..692131e2dc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..9a63fd7998 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..d627ec343d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..51bcea7219 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..3992fb774c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..6a071f31a0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..9c0f05192e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..15153f81b8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..7706ae6b11 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..f00a7d4bff --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..20280de2c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..7d23b648f8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..8b9f9f10dc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..f8e93185b0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..981b3627b4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..33b657a4ac --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e48f6a7148 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..6b3f51dfb8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..6b691e3f61 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..7ccc23cefa --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..0dc495da2d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..4aaff1d29e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..ae9e2ab347 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..9c42d89630 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_32_1_true_false", + embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..655488bc0e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_true_true", + embedded_interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..c7d11cff9c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_true_true", + embedded_interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..505529a024 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_true_true", + embedded_interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..82f2ba84fd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_true_true", + embedded_interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..0f7d69328b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_true_true", + embedded_interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..b3543f0629 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_true_true", + embedded_interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..4121b3bb68 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_true_true", + embedded_interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..fdfce5ade6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_1_true_true", + embedded_interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..666beb4ff0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_4_false_false", + embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..d573227b4a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_4_false_false", + embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..5a8d8b9380 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_4_false_false", + embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..ca6b2e37b4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_4_false_false", + embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..5326c9c85b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_4_false_false", + embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..2943f6f0e8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_4_false_false", + embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..e2a6ea97f3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_4_false_true", + embedded_interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..5c66a8a258 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_4_false_true", + embedded_interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..1fac0fc6df --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_4_true_false", + embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..20e99961ad --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_4_true_false", + embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..e2ca3078b0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_32_4_true_false", + embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..50da6d130b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_4_true_false", + embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..e232731e65 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_4_true_false", + embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..03012dcb4f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_32_4_true_false", + embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..059750251f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_4_true_true", + embedded_interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..d3c359fb87 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_4_true_true", + embedded_interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..b43f3154a5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_8_false_false", + embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..a6b5fd2548 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_8_false_false", + embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..bc69abe2ea --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_8_false_false", + embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..32ad26e987 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_8_false_false", + embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..e5eaa4f2d3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_8_false_false", + embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..fe96f555bc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_8_false_false", + embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..bbe9f9fc88 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_8_false_true", + embedded_interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..8b8c378154 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_8_false_true", + embedded_interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..fdff0f312f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_8_true_false", + embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..ad3f5de3eb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_8_true_false", + embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..e0fbf9e783 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_32_8_true_false", + embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..380f09c160 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_8_true_false", + embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..f3178fe37f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_32_8_true_false", + embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..a27f7186bb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_32_8_true_false", + embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..91b64add4f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_8_true_true", + embedded_interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..09574ccb96 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 32, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_32_8_true_true", + embedded_interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..763b0b2b5a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..939c9709f1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..8436db548a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..921de1a4df --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..6622d3c9ca --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..58a437e400 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..ecda14812a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..97df6c86d6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..0e46fa23c7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..4e37e49af1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..7685ed1706 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..6a97de5b3f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_false_false", + embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..695bc50e41 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_16_false_true", + embedded_interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..64b9d7fc3e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_16_false_true", + embedded_interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..76f787b727 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_16_false_true", + embedded_interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..0a2e4b2b76 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_16_false_true", + embedded_interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..3ffe23abf4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..d9dc2c15df --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..4f965114be --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..facf2bc00d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..a8c2f0879a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..8fd79f92f4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..e02ed08720 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..40d0985180 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..ce6575eed3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..3d33a1ae90 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..b493264e1c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..ab76b4239a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_16_true_false", + embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..0696e1409b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_16_true_true", + embedded_interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..272dac4ad4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_16_true_true", + embedded_interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..53c57a5cb9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_16_true_true", + embedded_interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..6d1870445e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_16_true_true", + embedded_interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..2f289a8ded --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..c2cd6e34a2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..337d99a1c6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..17377d2b5d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..67144e5dee --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..8a0d55edf9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..e7740674c0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..3fda2a8d91 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..ad54b4d903 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..cb220fcc28 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..b958645eef --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..143afbc924 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..4bcb94b825 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..f35dd50b86 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..266168e29d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..ff0d8f312b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..1592a020e9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..bc0c1035f8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..e3e33db5c1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..01798fcd83 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..b2976af3ba --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..47daa2c1dc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..98cc30fe49 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..3df7b36c1c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_false_false", + embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..07e8eb3149 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_false_true", + embedded_interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..4d31877a9c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_false_true", + embedded_interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..59560e17a5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_false_true", + embedded_interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..8458b8fd97 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_false_true", + embedded_interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..652b21f67e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_false_true", + embedded_interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..ff36ea3f1d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_false_true", + embedded_interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..a08c7548a4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_false_true", + embedded_interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..e6b89c53fc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_false_true", + embedded_interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..f6efe8349f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..4acfa185b9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..21dd415863 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..b897e5700c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..c0ea7501ca --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..d68c4d351b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..f525f7a18c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..0fc3f37dce --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..e5d57123c6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..235142e24d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..09364c0c63 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..0b1ac5b3f8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..497421e3d7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..8de775753e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..878dc72ce2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm< + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>("interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..a7a15502d7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..b791076b4c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..8d7b041099 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..fe1bf30af3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..915b48f6e6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..65b01a3598 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..a7b4e04edc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..8c0a8ed1d0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..b017f6a277 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_4_1_true_false", + embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..c5ba80e29d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_true_true", + embedded_interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..d9ec96f967 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_true_true", + embedded_interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..19ac034898 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_true_true", + embedded_interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..f1090aef2c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_true_true", + embedded_interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..e5f104793d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_true_true", + embedded_interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..7a2fee4d48 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_true_true", + embedded_interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..f100539318 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_true_true", + embedded_interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..0144a4dcff --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_1_true_true", + embedded_interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..b63104d923 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_4_false_false", + embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..146d228c9c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_4_false_false", + embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..23bd49d772 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_4_4_false_false", + embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..03054e2e72 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_4_false_false", + embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..6c7549b2d3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_4_false_false", + embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..3da31a31e3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_4_false_false", + embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..2ce228fbaa --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_4_false_true", + embedded_interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..b286ec543d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_4_false_true", + embedded_interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..5044092464 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_4_true_false", + embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..116446d53c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_4_true_false", + embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..fc10620cf6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_4_4_true_false", + embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..e477e78b37 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_4_true_false", + embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..25a8b28551 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_4_true_false", + embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..03d7eaac1f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_4_4_true_false", + embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..ccc5bcb936 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_4_true_true", + embedded_interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..3da6296356 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_4_true_true", + embedded_interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..4e074b0f24 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_8_false_false", + embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..86ac6b61db --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_8_false_false", + embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..9c1bdad603 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_4_8_false_false", + embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..e2a429172e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_8_false_false", + embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..79bd8c94a7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_8_false_false", + embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..18e744f6c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_8_false_false", + embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..6f1b7867d5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_8_false_true", + embedded_interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..fbf1e96e12 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_8_false_true", + embedded_interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..66c3139943 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_8_true_false", + embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..02a624edf2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_8_true_false", + embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..3cda9a0cae --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_4_8_true_false", + embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..a7165608a2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_8_true_false", + embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..04302f2eb4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_4_8_true_false", + embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..28fcb4ed12 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_4_8_true_false", + embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..b5d5d5b43d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_8_true_true", + embedded_interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..fa9bd46c1f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 4, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_4_8_true_true", + embedded_interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..83de96f840 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..504d51a4b2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..cf0ecf5eef --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..14b8a6deef --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..6e0fe1e1ef --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..d741bdf9a3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..e191d36354 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..f00e593299 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..7a73fa2364 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..d073c5bdd1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..6504458c4c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..024489ce7d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_false_false", + embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..f5c6838d2d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_16_false_true", + embedded_interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..2d437c349b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_16_false_true", + embedded_interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..6919f2f6c6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_16_false_true", + embedded_interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..23a2baeff0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_16_false_true", + embedded_interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..4f23f0e822 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..3bcadc4c8e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..62c11742a1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..c2efc348f9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..b6e8b9832f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..6df25297da --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..db5813e84e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..072fa51ba4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..2bb058c11a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..cd16f44adb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..00b518a587 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..54b90ba848 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_16_true_false", + embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..9521f519c8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_16_true_true", + embedded_interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..54f2a6816b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_16_true_true", + embedded_interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..74d84fde79 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_16_true_true", + embedded_interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..66b555bc1c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_16_true_true", + embedded_interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..aca61f47d6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..37277e3580 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..8ade867da9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..e10d1d4344 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..0d5e698d3f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..8830a301a6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..aaf86661f3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..e6146ad03f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..2821e04e16 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..f405eea8a4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..8f20aee192 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..a51a2cccba --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..fa0d64ea9f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..21ae76eade --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..a65523a598 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..fe0921ebd0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..b01b7256a6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..327b7d1792 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..c7b0d7db6d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..4b82034013 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..e0639772a6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..0d14b99b43 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..a1cb5204a5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..761b1b84f9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_false_false", + embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..2fb9304a71 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_false_true", + embedded_interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..7377259f3c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_false_true", + embedded_interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..8491937edd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_false_true", + embedded_interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..17e39882b2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_false_true", + embedded_interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..bb13554ae8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_false_true", + embedded_interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..4586b84807 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_false_true", + embedded_interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..539cf86101 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_false_true", + embedded_interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..555f0a0094 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_false_true", + embedded_interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..8eae2e2295 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..ec32b38101 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..5bbae73f63 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..afdfd9ee5a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..6e1ae09b17 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..24052094e5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..1d8e98431a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..acbb5a4716 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..307982d41c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..a7d41d73fd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..fe6a60aa08 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..32b223f46d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..c4685af733 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..c16b8b5e5b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..f821f96f29 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..a2e661a4f0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..60059dacd9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..51e0369588 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..1bf2c3cfc7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..1da8e0aaa0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..25bfcfe03c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..07a8ab4fac --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..21da741531 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..3b54a48e11 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_64_1_true_false", + embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..6336391044 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_true_true", + embedded_interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..88c46b76ab --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_true_true", + embedded_interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..ba7f1c6995 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_true_true", + embedded_interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..84c51bd68e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_true_true", + embedded_interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..8e4842998e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_true_true", + embedded_interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..5e29122033 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_true_true", + embedded_interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..8e80e9be05 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_true_true", + embedded_interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..53b3dc1a87 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_1_true_true", + embedded_interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..acfd708f76 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_4_false_false", + embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..d032ddcc4e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_4_false_false", + embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..19631506de --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_4_false_false", + embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..3c6a2acc43 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_4_false_false", + embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..4bd3838e99 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_4_false_false", + embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..40c7704a1e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_4_false_false", + embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..1342bfad21 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_4_false_true", + embedded_interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..f9e2b8d098 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_4_false_true", + embedded_interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..8323e30c3e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_4_true_false", + embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..480c08c544 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_4_true_false", + embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..20ae98adf2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_64_4_true_false", + embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..b22a3812e1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_4_true_false", + embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..7e92ddd425 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_4_true_false", + embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..e1cebc0fe2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_64_4_true_false", + embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..2730174bb3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_4_true_true", + embedded_interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..ee06b163d6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_4_true_true", + embedded_interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..091618ef69 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_8_false_false", + embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..01cdac1563 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_8_false_false", + embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..0cec38f6c3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_8_false_false", + embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..78a63322f0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_8_false_false", + embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..19820c1856 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_8_false_false", + embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..9596b2b7f0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_8_false_false", + embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..ead0281476 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_8_false_true", + embedded_interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..408bf0abe1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_8_false_true", + embedded_interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..f924a0e21c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_8_true_false", + embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..a5b2f0a835 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_8_true_false", + embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..b616e846f0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_64_8_true_false", + embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..5f464f40ee --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_8_true_false", + embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..9fe52ce048 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_64_8_true_false", + embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..09e8cc46fc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_64_8_true_false", + embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..1f748a0545 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_8_true_true", + embedded_interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..8e94466e06 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 64, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_64_8_true_true", + embedded_interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..9080e4fd3d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..8924196073 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..e2f65450fc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..676dcd4c04 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..a73c9610dc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..dcc8a50e69 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..af16b128c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..e562b24c99 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..598f0fa9ab --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..a483ca9a04 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..533cecce98 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..61321f7b85 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_false_false", + embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..ef35cab046 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_16_false_true", + embedded_interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..7ffd93cef8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_16_false_true", + embedded_interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..053970a47c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_16_false_true", + embedded_interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..9be78447bc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_16_false_true", + embedded_interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..0efbf00872 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..4c3197e321 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu new file mode 100644 index 0000000000..7fd65caf58 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..19c3f6d59c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..fa8a8e4843 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu new file mode 100644 index 0000000000..7b9a6bac5b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu new file mode 100644 index 0000000000..6d823854ae --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..448e8d9468 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu new file mode 100644 index 0000000000..5fe822da79 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu new file mode 100644 index 0000000000..41d6a505a5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu new file mode 100644 index 0000000000..bc7400fc8c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu new file mode 100644 index 0000000000..6a996b1e36 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_16_true_false", + embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id, + sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..9ece40adae --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_16_true_true", + embedded_interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..5f63240d6e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_16_true_true", + embedded_interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu new file mode 100644 index 0000000000..a83f27c084 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_16_true_true", + embedded_interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu new file mode 100644 index 0000000000..6d71c99e81 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 16, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_16_true_true", + embedded_interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose, + sizeof(embedded_interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..8ebec5238c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..391fb0d291 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..b08f946298 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..4936d3967b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..bc8be9056f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..680dff882e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..a9f80b78ea --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..43685d4b2e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..d63cf65ff3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..c7f42e5510 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..498c6bb4a2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..adb311a23e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..e01aa05f2d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..28db7f2c57 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..d4a9cee9d9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..1e5e5aa183 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..f4350fc264 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..1d68310436 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..fc5d11e4d3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..f8c2d688f3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..bb8d7ad0b5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..f0d0da2442 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..d8d8c180a8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..4c7e75e9cc --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_false_false", + embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..2417a71cbf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_false_true", + embedded_interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..b2f75be725 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_false_true", + embedded_interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..37e001bb23 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_false_true", + embedded_interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..def28cff07 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_false_true", + embedded_interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..b002057d62 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_false_true", + embedded_interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..67897b6a25 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_false_true", + embedded_interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..9a1c1ed047 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_false_true", + embedded_interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..ed923f0649 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + false, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_false_true", + embedded_interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..559f409c6b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..42749db625 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu new file mode 100644 index 0000000000..a31e838d03 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>("interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..d29b19f227 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..60a55a3014 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu new file mode 100644 index 0000000000..d7705ed8d4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..ff25a44c6d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..3c0469c50e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu new file mode 100644 index 0000000000..1b2cbdaf47 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..19a217b65b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..560653b426 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu new file mode 100644 index 0000000000..fbc10678a4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..39fb1fe146 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..6b0ce53866 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu new file mode 100644 index 0000000000..863b283267 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id() +{ + registerAlgorithm< + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>("interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..26ebd4c55e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..2568e34515 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu new file mode 100644 index 0000000000..1de92668bf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu new file mode 100644 index 0000000000..065e18cf8c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..98d443a8e5 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu new file mode 100644 index 0000000000..9fbeeb93f3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu new file mode 100644 index 0000000000..fd72f2db70 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu new file mode 100644 index 0000000000..1236530e31 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu new file mode 100644 index 0000000000..27daa156c9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + false, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::identity_op>( + "interleaved_scan_kernel_8_1_true_false", + embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id, + sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..5972b2094d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_true_true", + embedded_interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..6f4a0007ed --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_true_true", + embedded_interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..d3feba7c29 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_true_true", + embedded_interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..b6a32d007f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_true_true", + embedded_interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..79af215b41 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_true_true", + embedded_interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..6e12d0d238 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + true, + signed char, + int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + signed char const*, + unsigned int const*, + signed char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_true_true", + embedded_interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu new file mode 100644 index 0000000000..efabdf73f3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_true_true", + embedded_interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu new file mode 100644 index 0000000000..bf4d24eca9 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 1, + true, + true, + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + unsigned char const*, + unsigned int const*, + unsigned char const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose() +{ + registerAlgorithm< + unsigned char, + unsigned int, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_1_true_true", + embedded_interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose, + sizeof(embedded_interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..51a1dd6497 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_4_false_false", + embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..9124cb21c6 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_4_false_false", + embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..43bd67783c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_8_4_false_false", + embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..f08df71214 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_4_false_false", + embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..c3de94d319 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_4_false_false", + embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..5d54a32d0e --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + false, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_4_false_false", + embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..17ab3dc3f1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_4_false_true", + embedded_interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..b070340f5b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + false, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_4_false_true", + embedded_interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu new file mode 100644 index 0000000000..3eae120f1b --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_4_true_false", + embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..f6015566c8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_4_true_false", + embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu new file mode 100644 index 0000000000..2c36e5f0ef --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id() +{ + registerAlgorithm< + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>("interleaved_scan_kernel_8_4_true_false", + embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu new file mode 100644 index 0000000000..7a2a19a65d --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_4_true_false", + embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id, + sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu new file mode 100644 index 0000000000..0f19eda8b2 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_4_true_false", + embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu new file mode 100644 index 0000000000..08325c1143 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + true, + false, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::identity_op>( + "interleaved_scan_kernel_8_4_true_false", + embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id, + sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu new file mode 100644 index 0000000000..7addbaf8d0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose() +{ + registerAlgorithm>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_4_true_true", + embedded_interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu new file mode 100644 index 0000000000..05b66d8bb0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 4, + true, + true, + float, + float, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + float const*, + unsigned int const*, + float const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose() +{ + registerAlgorithm, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_4_true_true", + embedded_interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose, + sizeof(embedded_interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..c64c2177cb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_8_false_false", + embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..b1f0c24f5c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_8_false_false", + embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..20db7b00de --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_8_8_false_false", + embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..a21aa93306 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_8_false_false", + embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..cbde41dc21 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_8_false_false", + embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..e930a9d7c3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + false, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_8_false_false", + embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..9a2dd381e3 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_8_false_true", + embedded_interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..9f229550c8 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + false, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_8_false_true", + embedded_interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu new file mode 100644 index 0000000000..599d8c09bd --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_8_true_false", + embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..f35185bead --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_8_true_false", + embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu new file mode 100644 index 0000000000..940bda850c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id() +{ + registerAlgorithm< + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>("interleaved_scan_kernel_8_8_true_false", + embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu new file mode 100644 index 0000000000..e669b20128 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_8_true_false", + embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id, + sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu new file mode 100644 index 0000000000..4af434e8ce --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, + raft::sqrt_op>( + "interleaved_scan_kernel_8_8_true_false", + embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt, + sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu new file mode 100644 index 0000000000..a51b29bfcb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + true, + false, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::identity_op>( + "interleaved_scan_kernel_8_8_true_false", + embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id, + sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu new file mode 100644 index 0000000000..a9816599cf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering:: + ivf_to_sample_filter>, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter< + long, + cuvs::neighbors::filtering::bitset_filter>, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_8_true_true", + embedded_interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu new file mode 100644 index 0000000000..9978b5ad4c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ivf_flat_interleaved_scan.cuh" + +#ifdef BUILD_KERNEL + +template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< + 8, + 8, + true, + true, + __half, + __half, + long, + cuvs::neighbors::filtering::ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>, + unsigned int, + __half const*, + unsigned int const*, + __half const* const*, + unsigned int const*, + unsigned int, + unsigned int, + unsigned int, + unsigned int, + unsigned int const*, + unsigned int, + cuvs::neighbors::filtering::ivf_to_sample_filter, + unsigned int*, + float*); + +#else + +#include "interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.h" +#include + +__attribute__((__constructor__)) static void +register_interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose() +{ + registerAlgorithm<__half, + __half, + long, + cuvs::neighbors::filtering:: + ivf_to_sample_filter, + cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, + raft::compose_op, + raft::plug_const_op>>( + "interleaved_scan_kernel_8_8_true_true", + embedded_interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose, + sizeof(embedded_interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt new file mode 100644 index 0000000000..e0997394b1 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt @@ -0,0 +1,1280 @@ +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) +void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp new file mode 100644 index 0000000000..a92ee801c7 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +std::string bool_to_string(bool b) { return b ? "true" : "false"; } + +template +struct InterleavedScanPlanner : AlgorithmPlanner { + InterleavedScanPlanner(int Capacity, int Veclen, bool Ascending, bool ComputeNorm) + : AlgorithmPlanner("interleaved_scan_kernel_" + std::to_string(Capacity) + "_" + + std::to_string(Veclen) + "_" + bool_to_string(Ascending) + "_" + + bool_to_string(ComputeNorm), + make_fragment_key()) + { + std::cout << "In the planner" << std::endl; + } +}; diff --git a/dependencies.yaml b/dependencies.yaml index fcb83485f7..4c665c7e24 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -16,6 +16,7 @@ files: - cuda_version - depends_on_cuda_python - depends_on_cupy + - depends_on_libnvjitlink-dev - depends_on_librmm - depends_on_pylibraft - depends_on_nccl @@ -44,6 +45,7 @@ files: - depends_on_cupy - depends_on_pylibraft - depends_on_libcuvs + - depends_on_libnvjitlink-dev - depends_on_librmm - depends_on_nccl - rapids_build @@ -55,6 +57,7 @@ files: - test_libcuvs - depends_on_libcuvs - depends_on_libcuvs_tests + - depends_on_libnvjitlink-dev test_python: output: none includes: @@ -66,6 +69,7 @@ files: - depends_on_libcuvs - depends_on_cuvs - depends_on_cuvs_bench + - depends_on_libnvjitlink-dev checks: output: none includes: @@ -97,6 +101,7 @@ files: - rapids_build - rust - depends_on_libcuvs + - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_nccl go: @@ -111,6 +116,7 @@ files: - rapids_build - go - depends_on_libcuvs + - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_nccl java: @@ -119,6 +125,7 @@ files: - cuda - cuda_version - depends_on_libcuvs + - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_nccl - java @@ -137,6 +144,7 @@ files: table: tool.rapids-build-backend key: requires includes: + - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_librmm - depends_on_nccl @@ -148,6 +156,7 @@ files: table: project includes: - cuda_wheels + - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_librmm - depends_on_nccl @@ -169,6 +178,7 @@ files: - build_py_cuvs - depends_on_cuda_python - depends_on_libcuvs + - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_librmm - rapids_build @@ -179,6 +189,7 @@ files: table: project includes: - depends_on_cuda_python + - depends_on_libnvjitlink-dev - depends_on_libcuvs - depends_on_pylibraft - run_py_cuvs @@ -329,20 +340,29 @@ dependencies: cuda: "12.*" use_cuda_wheels: "true" packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse]==12.* + - nvidia-cublas-cu12 + - nvidia-curand-cu12 + - nvidia-cusolver-cu12 + - nvidia-cusparse-cu12 - matrix: cuda: "13.*" use_cuda_wheels: "true" packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse]==13.* + - nvidia-cublas-cu13 + - nvidia-curand-cu13 + - nvidia-cusolver-cu13 + - nvidia-cusparse-cu13 - matrix: use_cuda_wheels: "false" packages: - # if no matching matrix selectors passed, list a range + # if no matching matrix selectors passed, list the unsuffixed packages # (just as a source of documentation, as this populates pyproject.toml in source control) - matrix: packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse]>=12,<14 + - nvidia-cublas + - nvidia-curand + - nvidia-cusolver + - nvidia-cusparse depends_on_cupy: common: - output_types: conda @@ -475,7 +495,7 @@ dependencies: - output_types: [conda, pyproject, requirements] packages: - click - - cuvs==25.12.*,>=0.0.0a0 + - cuvs==25.10.*,>=0.0.0a0 - pandas - pyyaml - requests @@ -502,17 +522,17 @@ dependencies: common: - output_types: conda packages: - - cuvs==25.12.*,>=0.0.0a0 + - cuvs==25.10.*,>=0.0.0a0 depends_on_cuvs_bench: common: - output_types: conda packages: - - cuvs-bench==25.12.*,>=0.0.0a0 + - cuvs-bench==25.10.*,>=0.0.0a0 depends_on_libcuvs: common: - output_types: conda packages: - - &libcuvs_unsuffixed libcuvs==25.12.*,>=0.0.0a0 + - &libcuvs_unsuffixed libcuvs==25.10.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -525,23 +545,42 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libcuvs-cu12==25.12.*,>=0.0.0a0 + - libcuvs-cu12==25.10.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - libcuvs-cu13==25.12.*,>=0.0.0a0 + - libcuvs-cu13==25.10.*,>=0.0.0a0 - {matrix: null, packages: [*libcuvs_unsuffixed]} depends_on_libcuvs_tests: common: - output_types: conda packages: - - libcuvs-tests==25.12.*,>=0.0.0a0 + - libcuvs-tests==25.10.*,>=0.0.0a0 + depends_on_libnvjitlink-dev: + common: + - output_types: conda + packages: + - libnvjitlink-dev + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - nvidia-nvjitlink-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - nvidia-nvjitlink-cu13==25.10.*,>=0.0.0a0 + - {matrix: null, packages: [libnvjitlink-dev]} depends_on_libraft: common: - output_types: conda packages: - - &libraft_unsuffixed libraft==25.12.*,>=0.0.0a0 + - &libraft_unsuffixed libraft==25.10.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -554,18 +593,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libraft-cu12==25.12.*,>=0.0.0a0 + - libraft-cu12==25.10.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - libraft-cu13==25.12.*,>=0.0.0a0 + - libraft-cu13==25.10.*,>=0.0.0a0 - {matrix: null, packages: [*libraft_unsuffixed]} depends_on_librmm: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==25.12.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==25.10.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -578,18 +617,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==25.12.*,>=0.0.0a0 + - librmm-cu12==25.10.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - librmm-cu13==25.12.*,>=0.0.0a0 + - librmm-cu13==25.10.*,>=0.0.0a0 - {matrix: null, packages: [*librmm_unsuffixed]} depends_on_pylibraft: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==25.12.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==25.10.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -602,12 +641,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==25.12.*,>=0.0.0a0 + - pylibraft-cu12==25.10.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - pylibraft-cu13==25.12.*,>=0.0.0a0 + - pylibraft-cu13==25.10.*,>=0.0.0a0 - {matrix: null, packages: [*pylibraft_unsuffixed]} depends_on_nccl: common: diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index 98c097fc2e..b85d6bd680 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -32,9 +32,10 @@ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ "cuda-python>=13.0.1,<14.0a0", - "libcuvs==25.12.*,>=0.0.0a0", + "libcuvs==25.10.*,>=0.0.0a0", + "libnvjitlink-dev", "numpy>=1.23,<3.0a0", - "pylibraft==25.12.*,>=0.0.0a0", + "pylibraft==25.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -119,9 +120,10 @@ requires = [ "cmake>=3.30.4", "cuda-python>=13.0.1,<14.0a0", "cython>=3.0.0", - "libcuvs==25.12.*,>=0.0.0a0", - "libraft==25.12.*,>=0.0.0a0", - "librmm==25.12.*,>=0.0.0a0", + "libcuvs==25.10.*,>=0.0.0a0", + "libnvjitlink-dev", + "libraft==25.10.*,>=0.0.0a0", + "librmm==25.10.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "scikit_build_core.build" diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 15795d2751..0e052c46e1 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -19,7 +19,7 @@ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ "click", - "cuvs==25.12.*,>=0.0.0a0", + "cuvs==25.10.*,>=0.0.0a0", "matplotlib>=3.9", "pandas", "pyyaml", diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml index 09ad1f8778..e55d355d40 100644 --- a/python/libcuvs/pyproject.toml +++ b/python/libcuvs/pyproject.toml @@ -30,9 +30,13 @@ authors = [ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ - "cuda-toolkit[cublas,curand,cusolver,cusparse]>=12,<14", - "libraft==25.12.*,>=0.0.0a0", - "librmm==25.12.*,>=0.0.0a0", + "libnvjitlink-dev", + "libraft==25.10.*,>=0.0.0a0", + "librmm==25.10.*,>=0.0.0a0", + "nvidia-cublas", + "nvidia-curand", + "nvidia-cusolver", + "nvidia-cusparse", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -90,8 +94,9 @@ regex = "(?P.*)" build-backend = "scikit_build_core.build" requires = [ "cmake>=3.30.4", - "libraft==25.12.*,>=0.0.0a0", - "librmm==25.12.*,>=0.0.0a0", + "libnvjitlink-dev", + "libraft==25.10.*,>=0.0.0a0", + "librmm==25.10.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" From 45da4aaf055d9143cb805d4ad47e4dc23d9aef4f Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Oct 2025 18:40:54 +0000 Subject: [PATCH 002/158] fix dependencies.yaml --- .../all_cuda-129_arch-aarch64.yaml | 4 +- .../all_cuda-129_arch-x86_64.yaml | 4 +- .../all_cuda-130_arch-aarch64.yaml | 4 +- .../all_cuda-130_arch-x86_64.yaml | 4 +- .../bench_ann_cuda-129_arch-aarch64.yaml | 8 +-- .../bench_ann_cuda-129_arch-x86_64.yaml | 8 +-- .../bench_ann_cuda-130_arch-aarch64.yaml | 8 +-- .../bench_ann_cuda-130_arch-x86_64.yaml | 8 +-- .../go_cuda-129_arch-aarch64.yaml | 4 +- .../environments/go_cuda-129_arch-x86_64.yaml | 4 +- .../go_cuda-130_arch-aarch64.yaml | 4 +- .../environments/go_cuda-130_arch-x86_64.yaml | 4 +- .../rust_cuda-129_arch-aarch64.yaml | 4 +- .../rust_cuda-129_arch-x86_64.yaml | 4 +- .../rust_cuda-130_arch-aarch64.yaml | 4 +- .../rust_cuda-130_arch-x86_64.yaml | 4 +- dependencies.yaml | 51 ++++++++----------- python/cuvs/pyproject.toml | 10 ++-- python/cuvs_bench/pyproject.toml | 2 +- python/libcuvs/pyproject.toml | 13 ++--- 20 files changed, 72 insertions(+), 84 deletions(-) diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 257c91c8b4..c7d0d8b659 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libnvjitlink-dev -- librmm==25.10.*,>=0.0.0a0 +- librmm==25.12.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -40,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==25.10.*,>=0.0.0a0 +- pylibraft==25.12.*,>=0.0.0a0 - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0.dev0 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index ec6d0fb958..0969780718 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libnvjitlink-dev -- librmm==25.10.*,>=0.0.0a0 +- librmm==25.12.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -40,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==25.10.*,>=0.0.0a0 +- pylibraft==25.12.*,>=0.0.0a0 - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0.dev0 diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index 7b802b2309..04ccd3496b 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libnvjitlink-dev -- librmm==25.10.*,>=0.0.0a0 +- librmm==25.12.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -40,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==25.10.*,>=0.0.0a0 +- pylibraft==25.12.*,>=0.0.0a0 - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0.dev0 diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index 4d02d49918..1c5d09dda0 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libnvjitlink-dev -- librmm==25.10.*,>=0.0.0a0 +- librmm==25.12.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -40,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==25.10.*,>=0.0.0a0 +- pylibraft==25.12.*,>=0.0.0a0 - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0.dev0 diff --git a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml index 0126cd44b3..2d11678e7b 100644 --- a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=12.9.2,<13.0a0 - cuda-version=12.9 - cupy>=13.6.0 -- cuvs==25.10.*,>=0.0.0a0 +- cuvs==25.12.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -30,16 +30,16 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- librmm==25.10.*,>=0.0.0a0 +- librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==25.10.*,>=0.0.0a0 +- pylibraft==25.12.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.4.0,<0.5.0.dev0 - requests diff --git a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml index 33aa1eaf78..82ca29dbdd 100644 --- a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=12.9.2,<13.0a0 - cuda-version=12.9 - cupy>=13.6.0 -- cuvs==25.10.*,>=0.0.0a0 +- cuvs==25.12.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -32,9 +32,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- librmm==25.10.*,>=0.0.0a0 +- librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - mkl-devel=2023 - nccl>=2.19 @@ -42,7 +42,7 @@ dependencies: - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==25.10.*,>=0.0.0a0 +- pylibraft==25.12.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.4.0,<0.5.0.dev0 - requests diff --git a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml index 639cffd860..45dc071a2f 100644 --- a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=13.0.1,<14.0a0 - cuda-version=13.0 - cupy>=13.6.0 -- cuvs==25.10.*,>=0.0.0a0 +- cuvs==25.12.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -30,16 +30,16 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- librmm==25.10.*,>=0.0.0a0 +- librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==25.10.*,>=0.0.0a0 +- pylibraft==25.12.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.4.0,<0.5.0.dev0 - requests diff --git a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml index 88fa65f162..70c32bf03f 100644 --- a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=13.0.1,<14.0a0 - cuda-version=13.0 - cupy>=13.6.0 -- cuvs==25.10.*,>=0.0.0a0 +- cuvs==25.12.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -32,9 +32,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- librmm==25.10.*,>=0.0.0a0 +- librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - mkl-devel=2023 - nccl>=2.19 @@ -42,7 +42,7 @@ dependencies: - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==25.10.*,>=0.0.0a0 +- pylibraft==25.12.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.4.0,<0.5.0.dev0 - requests diff --git a/conda/environments/go_cuda-129_arch-aarch64.yaml b/conda/environments/go_cuda-129_arch-aarch64.yaml index 2cd0bebe3a..45e8f94697 100644 --- a/conda/environments/go_cuda-129_arch-aarch64.yaml +++ b/conda/environments/go_cuda-129_arch-aarch64.yaml @@ -24,9 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- libraft==25.10.*,>=0.0.0a0 +- libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja - sysroot_linux-aarch64==2.28 diff --git a/conda/environments/go_cuda-129_arch-x86_64.yaml b/conda/environments/go_cuda-129_arch-x86_64.yaml index c14dab9cbe..ce137edfce 100644 --- a/conda/environments/go_cuda-129_arch-x86_64.yaml +++ b/conda/environments/go_cuda-129_arch-x86_64.yaml @@ -24,9 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- libraft==25.10.*,>=0.0.0a0 +- libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja - sysroot_linux-64==2.28 diff --git a/conda/environments/go_cuda-130_arch-aarch64.yaml b/conda/environments/go_cuda-130_arch-aarch64.yaml index 1edc9e74e7..c6fd84a0d3 100644 --- a/conda/environments/go_cuda-130_arch-aarch64.yaml +++ b/conda/environments/go_cuda-130_arch-aarch64.yaml @@ -24,9 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- libraft==25.10.*,>=0.0.0a0 +- libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja - sysroot_linux-aarch64==2.28 diff --git a/conda/environments/go_cuda-130_arch-x86_64.yaml b/conda/environments/go_cuda-130_arch-x86_64.yaml index d4f5d1cd10..90bf0dc636 100644 --- a/conda/environments/go_cuda-130_arch-x86_64.yaml +++ b/conda/environments/go_cuda-130_arch-x86_64.yaml @@ -24,9 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- libraft==25.10.*,>=0.0.0a0 +- libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja - sysroot_linux-64==2.28 diff --git a/conda/environments/rust_cuda-129_arch-aarch64.yaml b/conda/environments/rust_cuda-129_arch-aarch64.yaml index 01a95cc416..216ea42da4 100644 --- a/conda/environments/rust_cuda-129_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-129_arch-aarch64.yaml @@ -21,9 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- libraft==25.10.*,>=0.0.0a0 +- libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-129_arch-x86_64.yaml b/conda/environments/rust_cuda-129_arch-x86_64.yaml index 4479cf8038..e9b8726a47 100644 --- a/conda/environments/rust_cuda-129_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-129_arch-x86_64.yaml @@ -21,9 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- libraft==25.10.*,>=0.0.0a0 +- libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-130_arch-aarch64.yaml b/conda/environments/rust_cuda-130_arch-aarch64.yaml index 4049a2c5e1..1c3317bbb6 100644 --- a/conda/environments/rust_cuda-130_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-130_arch-aarch64.yaml @@ -21,9 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- libraft==25.10.*,>=0.0.0a0 +- libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-130_arch-x86_64.yaml b/conda/environments/rust_cuda-130_arch-x86_64.yaml index 142033b23a..bb2d413b94 100644 --- a/conda/environments/rust_cuda-130_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-130_arch-x86_64.yaml @@ -21,9 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==25.10.*,>=0.0.0a0 +- libcuvs==25.12.*,>=0.0.0a0 - libnvjitlink-dev -- libraft==25.10.*,>=0.0.0a0 +- libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 - ninja diff --git a/dependencies.yaml b/dependencies.yaml index 4c665c7e24..b44828fe54 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -43,9 +43,9 @@ files: - cuda_version - depends_on_cuda_python - depends_on_cupy + - depends_on_libnvjitlink-dev - depends_on_pylibraft - depends_on_libcuvs - - depends_on_libnvjitlink-dev - depends_on_librmm - depends_on_nccl - rapids_build @@ -340,29 +340,20 @@ dependencies: cuda: "12.*" use_cuda_wheels: "true" packages: - - nvidia-cublas-cu12 - - nvidia-curand-cu12 - - nvidia-cusolver-cu12 - - nvidia-cusparse-cu12 + - cuda-toolkit[cublas,curand,cusolver,cusparse]==12.* - matrix: cuda: "13.*" use_cuda_wheels: "true" packages: - - nvidia-cublas-cu13 - - nvidia-curand-cu13 - - nvidia-cusolver-cu13 - - nvidia-cusparse-cu13 + - cuda-toolkit[cublas,curand,cusolver,cusparse]==13.* - matrix: use_cuda_wheels: "false" packages: - # if no matching matrix selectors passed, list the unsuffixed packages + # if no matching matrix selectors passed, list a range # (just as a source of documentation, as this populates pyproject.toml in source control) - matrix: packages: - - nvidia-cublas - - nvidia-curand - - nvidia-cusolver - - nvidia-cusparse + - cuda-toolkit[cublas,curand,cusolver,cusparse]>=12,<14 depends_on_cupy: common: - output_types: conda @@ -495,7 +486,7 @@ dependencies: - output_types: [conda, pyproject, requirements] packages: - click - - cuvs==25.10.*,>=0.0.0a0 + - cuvs==25.12.*,>=0.0.0a0 - pandas - pyyaml - requests @@ -522,17 +513,17 @@ dependencies: common: - output_types: conda packages: - - cuvs==25.10.*,>=0.0.0a0 + - cuvs==25.12.*,>=0.0.0a0 depends_on_cuvs_bench: common: - output_types: conda packages: - - cuvs-bench==25.10.*,>=0.0.0a0 + - cuvs-bench==25.12.*,>=0.0.0a0 depends_on_libcuvs: common: - output_types: conda packages: - - &libcuvs_unsuffixed libcuvs==25.10.*,>=0.0.0a0 + - &libcuvs_unsuffixed libcuvs==25.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -545,18 +536,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libcuvs-cu12==25.10.*,>=0.0.0a0 + - libcuvs-cu12==25.12.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - libcuvs-cu13==25.10.*,>=0.0.0a0 + - libcuvs-cu13==25.12.*,>=0.0.0a0 - {matrix: null, packages: [*libcuvs_unsuffixed]} depends_on_libcuvs_tests: common: - output_types: conda packages: - - libcuvs-tests==25.10.*,>=0.0.0a0 + - libcuvs-tests==25.12.*,>=0.0.0a0 depends_on_libnvjitlink-dev: common: - output_types: conda @@ -580,7 +571,7 @@ dependencies: common: - output_types: conda packages: - - &libraft_unsuffixed libraft==25.10.*,>=0.0.0a0 + - &libraft_unsuffixed libraft==25.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -593,18 +584,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libraft-cu12==25.10.*,>=0.0.0a0 + - libraft-cu12==25.12.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - libraft-cu13==25.10.*,>=0.0.0a0 + - libraft-cu13==25.12.*,>=0.0.0a0 - {matrix: null, packages: [*libraft_unsuffixed]} depends_on_librmm: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==25.10.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==25.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -617,18 +608,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==25.10.*,>=0.0.0a0 + - librmm-cu12==25.12.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - librmm-cu13==25.10.*,>=0.0.0a0 + - librmm-cu13==25.12.*,>=0.0.0a0 - {matrix: null, packages: [*librmm_unsuffixed]} depends_on_pylibraft: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==25.10.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==25.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -641,12 +632,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==25.10.*,>=0.0.0a0 + - pylibraft-cu12==25.12.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - pylibraft-cu13==25.10.*,>=0.0.0a0 + - pylibraft-cu13==25.12.*,>=0.0.0a0 - {matrix: null, packages: [*pylibraft_unsuffixed]} depends_on_nccl: common: diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index b85d6bd680..2c12a12a07 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -32,10 +32,10 @@ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ "cuda-python>=13.0.1,<14.0a0", - "libcuvs==25.10.*,>=0.0.0a0", + "libcuvs==25.12.*,>=0.0.0a0", "libnvjitlink-dev", "numpy>=1.23,<3.0a0", - "pylibraft==25.10.*,>=0.0.0a0", + "pylibraft==25.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -120,10 +120,10 @@ requires = [ "cmake>=3.30.4", "cuda-python>=13.0.1,<14.0a0", "cython>=3.0.0", - "libcuvs==25.10.*,>=0.0.0a0", + "libcuvs==25.12.*,>=0.0.0a0", "libnvjitlink-dev", - "libraft==25.10.*,>=0.0.0a0", - "librmm==25.10.*,>=0.0.0a0", + "libraft==25.12.*,>=0.0.0a0", + "librmm==25.12.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "scikit_build_core.build" diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 0e052c46e1..15795d2751 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -19,7 +19,7 @@ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ "click", - "cuvs==25.10.*,>=0.0.0a0", + "cuvs==25.12.*,>=0.0.0a0", "matplotlib>=3.9", "pandas", "pyyaml", diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml index e55d355d40..679a3e4d09 100644 --- a/python/libcuvs/pyproject.toml +++ b/python/libcuvs/pyproject.toml @@ -30,13 +30,10 @@ authors = [ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ + "cuda-toolkit[cublas,curand,cusolver,cusparse]>=12,<14", "libnvjitlink-dev", - "libraft==25.10.*,>=0.0.0a0", - "librmm==25.10.*,>=0.0.0a0", - "nvidia-cublas", - "nvidia-curand", - "nvidia-cusolver", - "nvidia-cusparse", + "libraft==25.12.*,>=0.0.0a0", + "librmm==25.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -95,8 +92,8 @@ build-backend = "scikit_build_core.build" requires = [ "cmake>=3.30.4", "libnvjitlink-dev", - "libraft==25.10.*,>=0.0.0a0", - "librmm==25.10.*,>=0.0.0a0", + "libraft==25.12.*,>=0.0.0a0", + "librmm==25.12.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" From a7c8621e0fa8347b6c9c5d69e8ae9f558abcf786 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 4 Oct 2025 07:11:30 +0000 Subject: [PATCH 003/158] generate files at build time, use tags to avoid compilation of types --- .gitignore | 4 + cpp/CMakeLists.txt | 34 +- .../interleaved_scan.cmake | 1300 ----------------- .../generate_interleaved_scan_kernels.cmake | 68 + .../ivf_flat/ivf_flat_interleaved_scan.cuh | 191 ++- .../ivf_flat/jit_lto_kernels/README.md | 45 +- .../jit_lto_kernels/generate_kernels.py | 154 +- ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...l_0_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...l_0_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ..._0_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ..._0_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...el_0_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...el_0_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...l_0_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...l_0_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._0_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_0_1_false_false_f_f_l_b_inner_1_id.cu | 71 - ..._0_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_0_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ..._0_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_0_1_false_false_h_h_l_b_inner_1_id.cu | 71 - ..._0_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_0_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...0_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...nel_0_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...0_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...nel_0_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...el_0_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...el_0_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ..._0_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ..._0_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ..._0_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ..._0_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...0_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...0_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...l_0_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_0_1_true_false_f_f_l_b_inner_1_id.cu | 71 - ...l_0_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_0_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ...l_0_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_0_1_true_false_h_h_l_b_inner_1_id.cu | 71 - ...l_0_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_0_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ..._0_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_0_1_true_false_sc_i_l_b_inner_1_id.cu | 71 - ..._0_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_0_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...0_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...nel_0_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...0_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...nel_0_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ...l_0_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ...l_0_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ...l_0_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ...l_0_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ..._0_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ..._0_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...0_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...0_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ..._0_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...rnel_0_4_false_false_f_f_l_b_inner_4_id.cu | 71 - ..._0_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...rnel_0_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ..._0_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ..._0_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ...l_0_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...ernel_0_4_true_false_f_f_l_b_inner_4_id.cu | 71 - ...l_0_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...ernel_0_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ...l_0_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ...l_0_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ..._0_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...rnel_0_8_false_false_h_h_l_b_inner_8_id.cu | 71 - ..._0_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...rnel_0_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ..._0_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ..._0_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ...l_0_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...ernel_0_8_true_false_h_h_l_b_inner_8_id.cu | 71 - ...l_0_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...ernel_0_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ...l_0_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ...l_0_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...128_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...128_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...28_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...28_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ..._128_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ..._128_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...128_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...128_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ...28_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...el_128_1_false_false_f_f_l_b_inner_1_id.cu | 72 - ...28_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...el_128_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ...28_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...el_128_1_false_false_h_h_l_b_inner_1_id.cu | 72 - ...28_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...el_128_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...8_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...l_128_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...8_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...l_128_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ..._128_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ..._128_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ...28_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ...28_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ...28_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ...28_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...8_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...8_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...128_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...nel_128_1_true_false_f_f_l_b_inner_1_id.cu | 72 - ...128_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...nel_128_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ...128_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...nel_128_1_true_false_h_h_l_b_inner_1_id.cu | 72 - ...128_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...nel_128_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ...28_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...el_128_1_true_false_sc_i_l_b_inner_1_id.cu | 72 - ...28_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...el_128_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...8_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...l_128_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...8_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...l_128_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ...128_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ...128_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ...128_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ...128_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ...28_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ...28_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...8_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...8_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...28_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...el_128_4_false_false_f_f_l_b_inner_4_id.cu | 72 - ...28_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...el_128_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ...28_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ...28_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ...128_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...nel_128_4_true_false_f_f_l_b_inner_4_id.cu | 72 - ...128_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...nel_128_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ...128_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ...128_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ...28_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...el_128_8_false_false_h_h_l_b_inner_8_id.cu | 72 - ...28_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...el_128_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ...28_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ...28_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ...128_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...nel_128_8_true_false_h_h_l_b_inner_8_id.cu | 72 - ...128_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...nel_128_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ...128_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ...128_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ..._16_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ..._16_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...16_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...16_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...l_16_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...l_16_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ..._16_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ..._16_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ...16_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...nel_16_1_false_false_f_f_l_b_inner_1_id.cu | 72 - ...16_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...nel_16_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ...16_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...nel_16_1_false_false_h_h_l_b_inner_1_id.cu | 72 - ...16_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...nel_16_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...6_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...el_16_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...6_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...el_16_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...l_16_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...l_16_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ...16_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ...16_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ...16_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ...16_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...6_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...6_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ..._16_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...6_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_16_1_true_false_f_f_l_b_inner_1_id.cu | 71 - ..._16_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...6_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_16_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ..._16_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...6_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_16_1_true_false_h_h_l_b_inner_1_id.cu | 71 - ..._16_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...6_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_16_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ...16_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...nel_16_1_true_false_sc_i_l_b_inner_1_id.cu | 72 - ...16_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...nel_16_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...6_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...el_16_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...6_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...el_16_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ..._16_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ..._16_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ..._16_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ..._16_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ...16_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ...16_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...6_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...6_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...16_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...nel_16_4_false_false_f_f_l_b_inner_4_id.cu | 72 - ...16_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...nel_16_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ...16_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ...16_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ..._16_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...6_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...rnel_16_4_true_false_f_f_l_b_inner_4_id.cu | 71 - ..._16_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...6_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...rnel_16_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ..._16_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ..._16_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ...16_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...nel_16_8_false_false_h_h_l_b_inner_8_id.cu | 72 - ...16_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...nel_16_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ...16_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ...16_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ..._16_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...6_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...rnel_16_8_true_false_h_h_l_b_inner_8_id.cu | 71 - ..._16_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...6_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...rnel_16_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ..._16_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ..._16_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...l_1_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...l_1_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ..._1_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ..._1_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...el_1_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...el_1_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...l_1_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...l_1_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._1_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_1_1_false_false_f_f_l_b_inner_1_id.cu | 71 - ..._1_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_1_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ..._1_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_1_1_false_false_h_h_l_b_inner_1_id.cu | 71 - ..._1_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_1_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...1_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...nel_1_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...1_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...nel_1_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...el_1_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...el_1_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ..._1_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ..._1_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ..._1_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ..._1_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...1_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...1_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...l_1_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_1_1_true_false_f_f_l_b_inner_1_id.cu | 71 - ...l_1_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_1_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ...l_1_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_1_1_true_false_h_h_l_b_inner_1_id.cu | 71 - ...l_1_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_1_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ..._1_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_1_1_true_false_sc_i_l_b_inner_1_id.cu | 71 - ..._1_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_1_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...1_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...nel_1_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...1_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...nel_1_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ...l_1_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ...l_1_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ...l_1_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ...l_1_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ..._1_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ..._1_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...1_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...1_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ..._1_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...rnel_1_4_false_false_f_f_l_b_inner_4_id.cu | 71 - ..._1_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...rnel_1_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ..._1_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ..._1_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ...l_1_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...ernel_1_4_true_false_f_f_l_b_inner_4_id.cu | 71 - ...l_1_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...ernel_1_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ...l_1_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ...l_1_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ..._1_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...rnel_1_8_false_false_h_h_l_b_inner_8_id.cu | 71 - ..._1_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...rnel_1_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ..._1_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ..._1_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ...l_1_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...ernel_1_8_true_false_h_h_l_b_inner_8_id.cu | 71 - ...l_1_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...ernel_1_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ...l_1_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ...l_1_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...256_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...256_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...56_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...56_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ..._256_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ..._256_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...256_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...256_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ...56_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...el_256_1_false_false_f_f_l_b_inner_1_id.cu | 72 - ...56_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...el_256_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ...56_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...el_256_1_false_false_h_h_l_b_inner_1_id.cu | 72 - ...56_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...el_256_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...6_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...l_256_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...6_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...l_256_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ..._256_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ..._256_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ...56_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ...56_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ...56_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ...56_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...6_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...6_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...256_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...6_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...nel_256_1_true_false_f_f_l_b_inner_1_id.cu | 72 - ...256_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...6_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...nel_256_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ...256_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...6_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...nel_256_1_true_false_h_h_l_b_inner_1_id.cu | 72 - ...256_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...6_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...nel_256_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ...56_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...el_256_1_true_false_sc_i_l_b_inner_1_id.cu | 72 - ...56_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...el_256_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...6_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...l_256_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...6_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...l_256_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ...256_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ...256_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ...256_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ...256_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ...56_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ...56_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...6_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...6_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...56_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...el_256_4_false_false_f_f_l_b_inner_4_id.cu | 72 - ...56_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...el_256_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ...56_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ...56_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ...256_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...6_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...nel_256_4_true_false_f_f_l_b_inner_4_id.cu | 72 - ...256_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...6_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...nel_256_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ...256_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ...256_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ...56_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...el_256_8_false_false_h_h_l_b_inner_8_id.cu | 72 - ...56_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...el_256_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ...56_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ...56_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ...256_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...6_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...nel_256_8_true_false_h_h_l_b_inner_8_id.cu | 72 - ...256_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...6_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...nel_256_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ...256_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ...256_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...l_2_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...l_2_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ..._2_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ..._2_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...el_2_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...el_2_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...l_2_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...l_2_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._2_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_2_1_false_false_f_f_l_b_inner_1_id.cu | 71 - ..._2_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_2_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ..._2_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_2_1_false_false_h_h_l_b_inner_1_id.cu | 71 - ..._2_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_2_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...2_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...nel_2_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...2_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...nel_2_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...el_2_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...el_2_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ..._2_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ..._2_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ..._2_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ..._2_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...2_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...2_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...l_2_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_2_1_true_false_f_f_l_b_inner_1_id.cu | 71 - ...l_2_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_2_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ...l_2_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_2_1_true_false_h_h_l_b_inner_1_id.cu | 71 - ...l_2_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_2_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ..._2_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_2_1_true_false_sc_i_l_b_inner_1_id.cu | 71 - ..._2_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_2_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...2_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...nel_2_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...2_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...nel_2_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ...l_2_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ...l_2_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ...l_2_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ...l_2_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ..._2_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ..._2_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...2_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...2_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ..._2_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...rnel_2_4_false_false_f_f_l_b_inner_4_id.cu | 71 - ..._2_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...rnel_2_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ..._2_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ..._2_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ...l_2_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...ernel_2_4_true_false_f_f_l_b_inner_4_id.cu | 71 - ...l_2_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...ernel_2_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ...l_2_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ...l_2_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ..._2_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...rnel_2_8_false_false_h_h_l_b_inner_8_id.cu | 71 - ..._2_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...rnel_2_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ..._2_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ..._2_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ...l_2_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...ernel_2_8_true_false_h_h_l_b_inner_8_id.cu | 71 - ...l_2_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...ernel_2_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ...l_2_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ...l_2_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ..._32_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ..._32_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...32_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...32_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...l_32_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...l_32_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ..._32_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ..._32_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ...32_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...nel_32_1_false_false_f_f_l_b_inner_1_id.cu | 72 - ...32_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...nel_32_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ...32_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...nel_32_1_false_false_h_h_l_b_inner_1_id.cu | 72 - ...32_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...nel_32_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...2_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...el_32_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...2_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...el_32_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...l_32_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...l_32_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ...32_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ...32_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ...32_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ...32_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...2_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...2_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ..._32_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_32_1_true_false_f_f_l_b_inner_1_id.cu | 71 - ..._32_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_32_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ..._32_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_32_1_true_false_h_h_l_b_inner_1_id.cu | 71 - ..._32_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_32_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ...32_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...nel_32_1_true_false_sc_i_l_b_inner_1_id.cu | 72 - ...32_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...nel_32_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...2_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...el_32_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...2_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...el_32_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ..._32_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ..._32_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ..._32_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ..._32_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ...32_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ...32_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...2_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...2_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...32_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...nel_32_4_false_false_f_f_l_b_inner_4_id.cu | 72 - ...32_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...nel_32_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ...32_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ...32_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ..._32_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...rnel_32_4_true_false_f_f_l_b_inner_4_id.cu | 71 - ..._32_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...rnel_32_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ..._32_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ..._32_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ...32_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...nel_32_8_false_false_h_h_l_b_inner_8_id.cu | 72 - ...32_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...nel_32_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ...32_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ...32_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ..._32_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...rnel_32_8_true_false_h_h_l_b_inner_8_id.cu | 71 - ..._32_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...rnel_32_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ..._32_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ..._32_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...l_4_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...l_4_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ..._4_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ..._4_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...el_4_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...el_4_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...l_4_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...l_4_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._4_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_4_1_false_false_f_f_l_b_inner_1_id.cu | 71 - ..._4_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_4_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ..._4_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_4_1_false_false_h_h_l_b_inner_1_id.cu | 71 - ..._4_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_4_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...4_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...nel_4_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...4_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...nel_4_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...el_4_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...el_4_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ..._4_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ..._4_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ..._4_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ..._4_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...4_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...4_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...l_4_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_4_1_true_false_f_f_l_b_inner_1_id.cu | 71 - ...l_4_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_4_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ...l_4_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_4_1_true_false_h_h_l_b_inner_1_id.cu | 71 - ...l_4_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_4_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ..._4_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_4_1_true_false_sc_i_l_b_inner_1_id.cu | 71 - ..._4_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_4_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...4_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...nel_4_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...4_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...nel_4_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ...l_4_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ...l_4_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ...l_4_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ...l_4_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ..._4_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ..._4_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...4_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...4_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ..._4_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...rnel_4_4_false_false_f_f_l_b_inner_4_id.cu | 71 - ..._4_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...rnel_4_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ..._4_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ..._4_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ...l_4_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...ernel_4_4_true_false_f_f_l_b_inner_4_id.cu | 71 - ...l_4_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...ernel_4_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ...l_4_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ...l_4_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ..._4_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...rnel_4_8_false_false_h_h_l_b_inner_8_id.cu | 71 - ..._4_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...rnel_4_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ..._4_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ..._4_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ...l_4_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...ernel_4_8_true_false_h_h_l_b_inner_8_id.cu | 71 - ...l_4_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...ernel_4_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ...l_4_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ...l_4_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ..._64_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ..._64_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...64_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...64_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...l_64_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...l_64_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ..._64_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ..._64_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ...64_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...nel_64_1_false_false_f_f_l_b_inner_1_id.cu | 72 - ...64_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...nel_64_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ...64_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...nel_64_1_false_false_h_h_l_b_inner_1_id.cu | 72 - ...64_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...nel_64_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...4_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...el_64_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...4_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...el_64_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...l_64_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...l_64_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ...64_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ...64_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ...64_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ...64_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...4_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...4_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ..._64_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_64_1_true_false_f_f_l_b_inner_1_id.cu | 71 - ..._64_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_64_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ..._64_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_64_1_true_false_h_h_l_b_inner_1_id.cu | 71 - ..._64_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_64_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ...64_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...nel_64_1_true_false_sc_i_l_b_inner_1_id.cu | 72 - ...64_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...nel_64_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...4_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...el_64_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...4_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...el_64_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ..._64_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ..._64_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ..._64_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ..._64_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ...64_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ...64_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...4_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...4_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...64_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...nel_64_4_false_false_f_f_l_b_inner_4_id.cu | 72 - ...64_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...nel_64_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ...64_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ...64_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ..._64_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...rnel_64_4_true_false_f_f_l_b_inner_4_id.cu | 71 - ..._64_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...rnel_64_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ..._64_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ..._64_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ...64_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...nel_64_8_false_false_h_h_l_b_inner_8_id.cu | 72 - ...64_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...nel_64_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ...64_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ...64_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ..._64_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...rnel_64_8_true_false_h_h_l_b_inner_8_id.cu | 71 - ..._64_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...rnel_64_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ..._64_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ..._64_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - ...16_false_false_sc_i_l_b_euclidean_16_id.cu | 72 - ..._false_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...l_8_16_false_false_sc_i_l_b_inner_16_id.cu | 72 - ...16_false_false_sc_i_l_n_euclidean_16_id.cu | 71 - ..._false_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...l_8_16_false_false_sc_i_l_n_inner_16_id.cu | 71 - ...6_false_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ..._8_16_false_false_uc_ui_l_b_inner_16_id.cu | 72 - ...6_false_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ...false_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ..._8_16_false_false_uc_ui_l_n_inner_16_id.cu | 72 - ...16_false_true_sc_i_l_b_inner_16_compose.cu | 75 - ...16_false_true_sc_i_l_n_inner_16_compose.cu | 74 - ...6_false_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...6_false_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._16_true_false_sc_i_l_b_euclidean_16_id.cu | 72 - ...6_true_false_sc_i_l_b_euclidean_16_sqrt.cu | 72 - ...el_8_16_true_false_sc_i_l_b_inner_16_id.cu | 72 - ..._16_true_false_sc_i_l_n_euclidean_16_id.cu | 71 - ...6_true_false_sc_i_l_n_euclidean_16_sqrt.cu | 71 - ...el_8_16_true_false_sc_i_l_n_inner_16_id.cu | 71 - ...16_true_false_uc_ui_l_b_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_b_euclidean_16_sqrt.cu | 72 - ...l_8_16_true_false_uc_ui_l_b_inner_16_id.cu | 72 - ...16_true_false_uc_ui_l_n_euclidean_16_id.cu | 72 - ..._true_false_uc_ui_l_n_euclidean_16_sqrt.cu | 72 - ...l_8_16_true_false_uc_ui_l_n_inner_16_id.cu | 72 - ..._16_true_true_sc_i_l_b_inner_16_compose.cu | 75 - ..._16_true_true_sc_i_l_n_inner_16_compose.cu | 74 - ...16_true_true_uc_ui_l_b_inner_16_compose.cu | 75 - ...16_true_true_uc_ui_l_n_inner_16_compose.cu | 75 - ..._8_1_false_false_f_f_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_8_1_false_false_f_f_l_b_inner_1_id.cu | 71 - ..._8_1_false_false_f_f_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_8_1_false_false_f_f_l_n_inner_1_id.cu | 71 - ..._8_1_false_false_h_h_l_b_euclidean_1_id.cu | 72 - ..._1_false_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_8_1_false_false_h_h_l_b_inner_1_id.cu | 71 - ..._8_1_false_false_h_h_l_n_euclidean_1_id.cu | 71 - ..._1_false_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_8_1_false_false_h_h_l_n_inner_1_id.cu | 71 - ...8_1_false_false_sc_i_l_b_euclidean_1_id.cu | 72 - ...1_false_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...nel_8_1_false_false_sc_i_l_b_inner_1_id.cu | 72 - ...8_1_false_false_sc_i_l_n_euclidean_1_id.cu | 71 - ...1_false_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...nel_8_1_false_false_sc_i_l_n_inner_1_id.cu | 71 - ..._1_false_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...el_8_1_false_false_uc_ui_l_b_inner_1_id.cu | 72 - ..._1_false_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ..._false_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...el_8_1_false_false_uc_ui_l_n_inner_1_id.cu | 72 - ..._8_1_false_true_f_f_l_b_inner_1_compose.cu | 75 - ..._8_1_false_true_f_f_l_n_inner_1_compose.cu | 74 - ..._8_1_false_true_h_h_l_b_inner_1_compose.cu | 75 - ..._8_1_false_true_h_h_l_n_inner_1_compose.cu | 74 - ...8_1_false_true_sc_i_l_b_inner_1_compose.cu | 75 - ...8_1_false_true_sc_i_l_n_inner_1_compose.cu | 74 - ..._1_false_true_uc_ui_l_b_inner_1_compose.cu | 75 - ..._1_false_true_uc_ui_l_n_inner_1_compose.cu | 75 - ...l_8_1_true_false_f_f_l_b_euclidean_1_id.cu | 72 - ...8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_8_1_true_false_f_f_l_b_inner_1_id.cu | 71 - ...l_8_1_true_false_f_f_l_n_euclidean_1_id.cu | 71 - ...8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_8_1_true_false_f_f_l_n_inner_1_id.cu | 71 - ...l_8_1_true_false_h_h_l_b_euclidean_1_id.cu | 72 - ...8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu | 72 - ...ernel_8_1_true_false_h_h_l_b_inner_1_id.cu | 71 - ...l_8_1_true_false_h_h_l_n_euclidean_1_id.cu | 71 - ...8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu | 71 - ...ernel_8_1_true_false_h_h_l_n_inner_1_id.cu | 71 - ..._8_1_true_false_sc_i_l_b_euclidean_1_id.cu | 72 - ..._1_true_false_sc_i_l_b_euclidean_1_sqrt.cu | 72 - ...rnel_8_1_true_false_sc_i_l_b_inner_1_id.cu | 71 - ..._8_1_true_false_sc_i_l_n_euclidean_1_id.cu | 71 - ..._1_true_false_sc_i_l_n_euclidean_1_sqrt.cu | 71 - ...rnel_8_1_true_false_sc_i_l_n_inner_1_id.cu | 71 - ...8_1_true_false_uc_ui_l_b_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu | 72 - ...nel_8_1_true_false_uc_ui_l_b_inner_1_id.cu | 72 - ...8_1_true_false_uc_ui_l_n_euclidean_1_id.cu | 72 - ...1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu | 72 - ...nel_8_1_true_false_uc_ui_l_n_inner_1_id.cu | 72 - ...l_8_1_true_true_f_f_l_b_inner_1_compose.cu | 75 - ...l_8_1_true_true_f_f_l_n_inner_1_compose.cu | 74 - ...l_8_1_true_true_h_h_l_b_inner_1_compose.cu | 75 - ...l_8_1_true_true_h_h_l_n_inner_1_compose.cu | 74 - ..._8_1_true_true_sc_i_l_b_inner_1_compose.cu | 75 - ..._8_1_true_true_sc_i_l_n_inner_1_compose.cu | 74 - ...8_1_true_true_uc_ui_l_b_inner_1_compose.cu | 75 - ...8_1_true_true_uc_ui_l_n_inner_1_compose.cu | 75 - ..._8_4_false_false_f_f_l_b_euclidean_4_id.cu | 72 - ..._4_false_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...rnel_8_4_false_false_f_f_l_b_inner_4_id.cu | 71 - ..._8_4_false_false_f_f_l_n_euclidean_4_id.cu | 71 - ..._4_false_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...rnel_8_4_false_false_f_f_l_n_inner_4_id.cu | 71 - ..._8_4_false_true_f_f_l_b_inner_4_compose.cu | 75 - ..._8_4_false_true_f_f_l_n_inner_4_compose.cu | 74 - ...l_8_4_true_false_f_f_l_b_euclidean_4_id.cu | 72 - ...8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu | 72 - ...ernel_8_4_true_false_f_f_l_b_inner_4_id.cu | 71 - ...l_8_4_true_false_f_f_l_n_euclidean_4_id.cu | 71 - ...8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu | 71 - ...ernel_8_4_true_false_f_f_l_n_inner_4_id.cu | 71 - ...l_8_4_true_true_f_f_l_b_inner_4_compose.cu | 75 - ...l_8_4_true_true_f_f_l_n_inner_4_compose.cu | 74 - ..._8_8_false_false_h_h_l_b_euclidean_8_id.cu | 72 - ..._8_false_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...rnel_8_8_false_false_h_h_l_b_inner_8_id.cu | 71 - ..._8_8_false_false_h_h_l_n_euclidean_8_id.cu | 71 - ..._8_false_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...rnel_8_8_false_false_h_h_l_n_inner_8_id.cu | 71 - ..._8_8_false_true_h_h_l_b_inner_8_compose.cu | 75 - ..._8_8_false_true_h_h_l_n_inner_8_compose.cu | 74 - ...l_8_8_true_false_h_h_l_b_euclidean_8_id.cu | 72 - ...8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu | 72 - ...ernel_8_8_true_false_h_h_l_b_inner_8_id.cu | 71 - ...l_8_8_true_false_h_h_l_n_euclidean_8_id.cu | 71 - ...8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu | 71 - ...ernel_8_8_true_false_h_h_l_n_inner_8_id.cu | 71 - ...l_8_8_true_true_h_h_l_b_inner_8_compose.cu | 75 - ...l_8_8_true_true_h_h_l_n_inner_8_compose.cu | 74 - .../interleaved_scan_planner.hpp | 2 +- .../jit_lto_kernels/interleaved_scan_tags.hpp | 56 + 1289 files changed, 444 insertions(+), 93993 deletions(-) delete mode 100644 cpp/cmake/jit_lto_kernels_list/interleaved_scan.cmake create mode 100644 cpp/cmake/modules/generate_interleaved_scan_kernels.cmake delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp diff --git a/.gitignore b/.gitignore index 3627558ff5..198f847982 100644 --- a/.gitignore +++ b/.gitignore @@ -88,3 +88,7 @@ ivf_pq_index # java .classpath + +# jit lto kernels +cpp/cmake/jit_lto_kernels_list/ +cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_*.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8d176a595e..966c2c3349 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -217,7 +217,10 @@ endif() # this is needed to embed fatbins to JIT at runtime include(cmake/modules/embed_fatbins.cmake) -include(cmake/jit_lto_kernels_list/interleaved_scan.cmake) + +# Generate interleaved scan kernel files at build time +include(cmake/modules/generate_interleaved_scan_kernels.cmake) +generate_interleaved_scan_kernels() # ################################################################################################## # * cuvs --------------------------------------------------------------------- @@ -572,8 +575,13 @@ if(NOT BUILD_CPU_ONLY) ${INTERLEAVED_SCAN_KERNEL_FILES} ) + # Make sure the kernels are generated before we try to build them + add_dependencies(jit_lto_fatbins ${INTERLEAVED_SCAN_KERNELS_TARGET}) + target_compile_definitions(jit_lto_fatbins PRIVATE BUILD_KERNEL) - target_include_directories(jit_lto_fatbins PRIVATE "$") + target_include_directories(jit_lto_fatbins PRIVATE + "$" + ) target_compile_options(jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size) target_compile_options( jit_lto_fatbins PRIVATE "$<$:${CUVS_CXX_FLAGS}>" @@ -594,12 +602,24 @@ if(NOT BUILD_CPU_ONLY) add_library(jit_lto_fatbins_as_cpp_sources STATIC src/detail/jit_lto/AlgorithmPlanner.cu ) - target_include_directories(jit_lto_fatbins_as_cpp_sources PRIVATE "$") - target_compile_options( - jit_lto_fatbins_as_cpp_sources PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" + + # Set PIC for the static library since it will be linked into a shared library + set_target_properties(jit_lto_fatbins_as_cpp_sources PROPERTIES + CUDA_ARCHITECTURES "75-real" + POSITION_INDEPENDENT_CODE ON + ) + + # Make sure the kernels are generated before embedding fatbins + add_dependencies(jit_lto_fatbins_as_cpp_sources ${INTERLEAVED_SCAN_KERNELS_TARGET}) + + target_include_directories(jit_lto_fatbins_as_cpp_sources PRIVATE + "$" ) - target_link_libraries(jit_lto_fatbins_as_cpp_sources PRIVATE CUDA::cuda_driver rmm::rmm raft::raft CCCL::CCCL) + # target_compile_options( + # jit_lto_fatbins_as_cpp_sources PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + # "$<$:${CUVS_CUDA_FLAGS}>" + # ) + target_link_libraries(jit_lto_fatbins_as_cpp_sources PRIVATE CUDA::cuda_driver) embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) diff --git a/cpp/cmake/jit_lto_kernels_list/interleaved_scan.cmake b/cpp/cmake/jit_lto_kernels_list/interleaved_scan.cmake deleted file mode 100644 index 9c3bc41e8f..0000000000 --- a/cpp/cmake/jit_lto_kernels_list/interleaved_scan.cmake +++ /dev/null @@ -1,1300 +0,0 @@ -# ============================================================================= -# Copyright (c) 2025, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - - -# Auto-generated list of interleaved scan kernel files -# Generated by generate_kernels.py - -set(INTERLEAVED_SCAN_KERNEL_FILES - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu - src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu -) diff --git a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake new file mode 100644 index 0000000000..279bd7e7c7 --- /dev/null +++ b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake @@ -0,0 +1,68 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Generate interleaved scan kernel files at build time +function(generate_interleaved_scan_kernels) + find_package(Python3 REQUIRED COMPONENTS Interpreter) + + set(KERNEL_LIST_FILE ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt) + set(GENERATOR_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py) + set(OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels) + set(CMAKE_LIST_FILE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/jit_lto_kernels_list/interleaved_scan.cmake) + set(STAMP_FILE ${CMAKE_CURRENT_BINARY_DIR}/kernels_generated.stamp) + + # Generate the kernels at build time + add_custom_command( + OUTPUT ${STAMP_FILE} + COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} + COMMAND ${CMAKE_COMMAND} -E touch ${STAMP_FILE} + DEPENDS ${KERNEL_LIST_FILE} ${GENERATOR_SCRIPT} + COMMENT "Generating interleaved scan kernel files..." + VERBATIM + ) + + # Create a custom target that depends on the stamp file + add_custom_target(generate_interleaved_scan_kernels_target + DEPENDS ${STAMP_FILE} + ) + + # Include the generated CMake list file + # Only generate if the CMake list file doesn't exist + if(NOT EXISTS ${CMAKE_LIST_FILE}) + message(STATUS "Generating interleaved scan kernels for the first time...") + execute_process( + COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} + WORKING_DIRECTORY ${OUTPUT_DIR} + RESULT_VARIABLE GENERATION_RESULT + ) + + if(NOT GENERATION_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to generate kernel files during configuration") + endif() + endif() + + # Include the generated CMake file + include(${CMAKE_LIST_FILE}) + + # Prepend the source directory path to all files + set(FULL_PATH_FILES) + foreach(kernel_file ${INTERLEAVED_SCAN_KERNEL_FILES}) + list(APPEND FULL_PATH_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${kernel_file}) + endforeach() + + # Return the list to parent scope + set(INTERLEAVED_SCAN_KERNEL_FILES ${FULL_PATH_FILES} PARENT_SCOPE) + set(INTERLEAVED_SCAN_KERNELS_STAMP ${STAMP_FILE} PARENT_SCOPE) + set(INTERLEAVED_SCAN_KERNELS_TARGET generate_interleaved_scan_kernels_target PARENT_SCOPE) +endfunction() diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index 8bf0d393a7..a8059fd381 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -19,6 +19,7 @@ #include "../ivf_common.cuh" #include "../sample_filter.cuh" #include "jit_lto_kernels/interleaved_scan_planner.hpp" +#include "jit_lto_kernels/interleaved_scan_tags.hpp" #include #include @@ -41,6 +42,129 @@ using namespace cuvs::spatial::knn::detail; // NOLINT constexpr int kThreadsPerBlock = 128; +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) + { + const auto diff = x - y; + acc += diff * diff; + } +}; + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(uint32_t& acc, uint32_t x, uint32_t y) + { + if constexpr (Veclen > 1) { + const auto diff = __vabsdiffu4(x, y); + acc = raft::dp4a(diff, diff, acc); + } else { + const auto diff = __usad(x, y, 0u); + acc += diff * diff; + } + } +}; + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(int32_t& acc, int32_t x, int32_t y) + { + if constexpr (Veclen > 1) { + // Note that we enforce here that the unsigned version of dp4a is used, because the difference + // between two int8 numbers can be greater than 127 and therefore represented as a negative + // number in int8. Casting from int8 to int32 would yield incorrect results, while casting + // from uint8 to uint32 is correct. + const auto diff = __vabsdiffs4(x, y); + acc = raft::dp4a(diff, diff, static_cast(acc)); + } else { + const auto diff = x - y; + acc += diff * diff; + } + } +}; + +template +struct inner_prod_dist { + __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) + { + if constexpr (Veclen > 1 && (std::is_same_v || std::is_same_v)) { + acc = raft::dp4a(x, y, acc); + } else { + acc += x * y; + } + } +}; + +// Constexpr mapping functions from actual types to tags +template +constexpr auto get_data_type_tag() +{ + if constexpr (std::is_same_v) { return tag_float{}; } + if constexpr (std::is_same_v) { return tag_half{}; } + if constexpr (std::is_same_v) { return tag_int8{}; } + if constexpr (std::is_same_v) { return tag_uint8{}; } +} + +template +constexpr auto get_acc_type_tag() +{ + if constexpr (std::is_same_v) { return tag_acc_float{}; } + if constexpr (std::is_same_v) { return tag_acc_half{}; } + if constexpr (std::is_same_v) { return tag_acc_int32{}; } + if constexpr (std::is_same_v) { return tag_acc_uint32{}; } +} + +template +constexpr auto get_idx_type_tag() +{ + if constexpr (std::is_same_v) { return tag_idx_int64{}; } +} + +template +constexpr auto get_filter_type_tag() +{ + using namespace cuvs::neighbors::filtering; + + // Determine the filter implementation tag + if constexpr (std::is_same_v>) { + return tag_filter{}; + } + if constexpr (std::is_same_v>>) { + return tag_filter{}; + } +} + +template +constexpr auto get_metric_tag() +{ + // Get tags for T and AccT + auto t_tag = get_data_type_tag(); + auto acc_tag = get_acc_type_tag(); + + // Check for euclidean_dist and return templated tag with tag types + if constexpr (std::is_same_v>) { + return tag_metric_euclidean{}; + } + // Check for inner_prod_dist and return templated tag with tag types + if constexpr (std::is_same_v>) { + return tag_metric_inner_product{}; + } +} + +template +constexpr auto get_post_lambda_tag() +{ + using namespace raft; + + if constexpr (std::is_same_v) { return tag_post_identity{}; } + if constexpr (std::is_same_v) { return tag_post_sqrt{}; } + if constexpr (std::is_same_v, raft::mul_const_op>>) { + return tag_post_compose{}; + } +} + /** * @brief Copy `n` elements per block from one place to another. * @@ -974,7 +1098,10 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) * Configure the gridDim.x to maximize GPU occupancy, but reduce the output size */ // template -uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMemSize, CUkernel func) +inline uint32_t configure_launch_x(uint32_t numQueries, + uint32_t n_probes, + int32_t sMemSize, + CUkernel func) { int dev_id; RAFT_CUDA_TRY(cudaGetDevice(&dev_id)); @@ -1028,7 +1155,14 @@ void launch_kernel(Lambda lambda, // IvfSampleFilterT, // Lambda, // PostLambda>; - auto kernel_planner = InterleavedScanPlanner( + + // Use tag types for the planner to avoid template bloat + auto kernel_planner = InterleavedScanPlanner()), + decltype(get_acc_type_tag()), + decltype(get_idx_type_tag()), + decltype(get_filter_type_tag()), + decltype(get_metric_tag()), + decltype(get_post_lambda_tag())>( Capacity, Veclen, Ascending, ComputeNorm); auto kernel_launcher = kernel_planner.get_launcher(); @@ -1114,59 +1248,6 @@ void launch_kernel(Lambda lambda, } } -template -struct euclidean_dist { - __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) - { - const auto diff = x - y; - acc += diff * diff; - } -}; - -template -struct euclidean_dist { - __device__ __forceinline__ void operator()(uint32_t& acc, uint32_t x, uint32_t y) - { - if constexpr (Veclen > 1) { - const auto diff = __vabsdiffu4(x, y); - acc = raft::dp4a(diff, diff, acc); - } else { - const auto diff = __usad(x, y, 0u); - acc += diff * diff; - } - } -}; - -template -struct euclidean_dist { - __device__ __forceinline__ void operator()(int32_t& acc, int32_t x, int32_t y) - { - if constexpr (Veclen > 1) { - // Note that we enforce here that the unsigned version of dp4a is used, because the difference - // between two int8 numbers can be greater than 127 and therefore represented as a negative - // number in int8. Casting from int8 to int32 would yield incorrect results, while casting - // from uint8 to uint32 is correct. - const auto diff = __vabsdiffs4(x, y); - acc = raft::dp4a(diff, diff, static_cast(acc)); - } else { - const auto diff = x - y; - acc += diff * diff; - } - } -}; - -template -struct inner_prod_dist { - __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) - { - if constexpr (Veclen > 1 && (std::is_same_v || std::is_same_v)) { - acc = raft::dp4a(x, y, acc); - } else { - acc += x * y; - } - } -}; - /** Select the distance computation function and forward the rest of the arguments. */ template ' + + # Distance metric (param 8: Lambda) + elif param_index == 8: + # Extract veclen from the Lambda type + veclen_match = re.search(r'<(\d+),', param_value) + veclen = veclen_match.group(1) if veclen_match else all_params[1] + + # Get tags for T and AccT + T_tag = param_to_tag(4, all_params[4], all_params) + AccT_tag = param_to_tag(5, all_params[5], all_params) + + # Return templated tag based on metric type + if 'euclidean_dist' in param_value: + return f'tag_metric_euclidean<{veclen}, {T_tag}, {AccT_tag}>' + elif 'inner_prod_dist' in param_value: + return f'tag_metric_inner_product<{veclen}, {T_tag}, {AccT_tag}>' + return param_value + + # Post-processing lambda (param 9: PostLambda) + elif param_index == 9: + if 'identity_op' in param_value: + return 'tag_post_identity' + elif 'sqrt_op' in param_value: + return 'tag_post_sqrt' + elif 'compose_op' in param_value: + return 'tag_post_compose' + return param_value + + return param_value + + def generate_cuda_file_content(params): """Generate the content of a CUDA kernel file.""" filename = generate_register_function_name(params) @@ -149,8 +231,9 @@ def generate_cuda_file_content(params): # Format template parameters for the template instantiation (all 10 params) template_params = ', '.join(params) - # Format template parameters for registerAlgorithm (params 4-9, excluding first four) - register_template_params = ', '.join(params[4:]) + # Convert params 4-9 to tag types for registerAlgorithm + tag_params = [param_to_tag(i, params[i], params) for i in range(4, 10)] + register_template_params = ', '.join(tag_params) # Create the string parameter with first four params (Capacity, Veclen, Ascending, ComputeNorm) string_param = f"interleaved_scan_kernel_{params[0]}_{params[1]}_{params[2]}_{params[3]}" @@ -171,19 +254,21 @@ def generate_cuda_file_content(params): * limitations under the License. */ -#include "../ivf_flat_interleaved_scan.cuh" - #ifdef BUILD_KERNEL +#include "../../ivf_flat_interleaved_scan.cuh" + template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<{template_params}>({params[8]}, {params[9]}, unsigned int, {params[4]} const*, unsigned int const*, {params[4]} const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, {params[7]}, unsigned int*, float*); #else #include "{filename}.h" #include +#include "../interleaved_scan_tags.hpp" __attribute__((__constructor__)) static void register_{filename}() {{ +using namespace cuvs::neighbors::ivf_flat::detail; registerAlgorithm< {register_template_params}>("{string_param}", {embedded_var_name}, @@ -197,8 +282,11 @@ def generate_cuda_file_content(params): def main(): - # Read the kernels file (now in the same directory) - kernels_file = Path('interleaved_scan_kernels.txt') + # Get the script directory to find the kernels file + script_dir = Path(__file__).parent.absolute() + + # Read the kernels file (in the same directory as this script) + kernels_file = script_dir / 'interleaved_scan_kernels.txt' if not kernels_file.exists(): print(f"Error: {kernels_file} not found!") return @@ -206,8 +294,8 @@ def main(): with open(kernels_file, 'r') as f: lines = f.readlines() - # Output directory (current directory) - output_dir = Path('.') + # Output directory (same directory as the script) + output_dir = script_dir # Parse all kernels and generate files generated_files = [] @@ -244,10 +332,17 @@ def main(): filename = generate_filename(params) file_content = generate_cuda_file_content(params) - # Write file + # Write file only if it doesn't exist or content has changed output_file = output_dir / filename - with open(output_file, 'w') as f: - f.write(file_content) + should_write = True + if output_file.exists(): + with open(output_file, 'r') as f: + existing_content = f.read() + should_write = (existing_content != file_content) + + if should_write: + with open(output_file, 'w') as f: + f.write(file_content) generated_files.append(filename) @@ -257,16 +352,33 @@ def main(): print(f"\nGenerated {len(generated_files)} CUDA kernel files") # Generate CMake file with all filenames - cmake_file = Path('../../../../cmake/jit_lto_kernels_list') / 'interleaved_scan.cmake' - with open(cmake_file, 'w') as f: - f.write("# Auto-generated list of interleaved scan kernel files\n") - f.write("# Generated by generate_kernels.py\n\n") - f.write("set(INTERLEAVED_SCAN_KERNEL_FILES\n") - for filename in sorted(generated_files): - f.write(f" src/neighbors/ivf_flat/jit_lto_kernels/{filename}\n") - f.write(")\n") - - print(f"Generated CMake file: {cmake_file}") + # We're generating in the source tree at: cpp/src/neighbors/ivf_flat/jit_lto_kernels/ + # CMake file goes to: cpp/cmake/jit_lto_kernels_list/ + cmake_dir = script_dir.parent.parent.parent.parent / 'cmake' / 'jit_lto_kernels_list' + cmake_dir.mkdir(parents=True, exist_ok=True) + cmake_file = cmake_dir / 'interleaved_scan.cmake' + + # Generate CMake content + cmake_content = "# Auto-generated list of interleaved scan kernel files\n" + cmake_content += "# Generated by generate_kernels.py\n\n" + cmake_content += "set(INTERLEAVED_SCAN_KERNEL_FILES\n" + for filename in sorted(generated_files): + cmake_content += f" src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels/{filename}\n" + cmake_content += ")\n" + + # Only write if content has changed + should_write_cmake = True + if cmake_file.exists(): + with open(cmake_file, 'r') as f: + existing_cmake = f.read() + should_write_cmake = (existing_cmake != cmake_content) + + if should_write_cmake: + with open(cmake_file, 'w') as f: + f.write(cmake_content) + print(f"Updated CMake file: {cmake_file}") + else: + print(f"CMake file unchanged: {cmake_file}") if __name__ == '__main__': main() diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index d00bc40450..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index f0e2e2f906..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 0179171be9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 59d418bc01..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 4775a5f361..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 0c28b4164f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index 943fd3247a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 8df6900f3d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index f9a7e499c6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 3caf608abe..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 959080398a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index de88e310b0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_false_false", - embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 46c0cb5193..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_16_false_true", - embedded_interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_0_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 35b2de35d1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_16_false_true", - embedded_interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_0_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 67c9d2010c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_16_false_true", - embedded_interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_0_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index c1630265e6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_16_false_true", - embedded_interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_0_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 65ff443459..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 290f5889bb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 278b9d8fc6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index c66eda1c33..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index bdda3c39bf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 211bb65cd7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index d72f32931b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index ce7009d993..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index af46ae904e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index c0914e8406..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index f2c57778cd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 1c5d592e0d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_16_true_false", - embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_0_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 970656ed25..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_16_true_true", - embedded_interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_0_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 6c90ed5644..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_16_true_true", - embedded_interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_0_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index d11dd45984..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_16_true_true", - embedded_interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_0_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 63c5c08309..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_16_true_true", - embedded_interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_0_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index da4f8038d5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 33e8352483..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 580fb44149..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index edc330b61a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 73ef0a45d0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 34327a4358..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index f0b9914492..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index a9bd061376..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 491bc49ed7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index fb9f4df57e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 4f089adb87..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 8ed094b4eb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 778b3e740f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index dea20518c0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 1acd28fd47..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index 4c5f70f24d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 5a18844a55..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 154955c2d0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index 488ef48da4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 692911cbbc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index e305ae7102..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 7e66cee018..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index e544ac9ade..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 6c274ae690..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_false_false", - embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index ed6bec67d3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_false_true", - embedded_interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 17295b7515..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_false_true", - embedded_interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 1bb3691c13..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_false_true", - embedded_interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index d2e16b5adf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_false_true", - embedded_interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 81a880b7b2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_false_true", - embedded_interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 48ab435ceb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_false_true", - embedded_interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 6d32258ebc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_false_true", - embedded_interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 82f015e63b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_false_true", - embedded_interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index cb10fc094a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index d49001e12a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index ce9973a95c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index b92dc63a75..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 5e901504a6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index c0e3e48723..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 3ff28fd79a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 5c20937d09..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 8eab1fa199..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index e0b6c12be8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 4d1a11888b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index a0d86086b2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 907a9f521f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 67c6d47c03..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 92d1f60583..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm< - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>("interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index b388686f90..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index b1c0ff0ac1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index f8ebb0e5d3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index fa85e1fe3f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index aa3c9475c1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index cf0d8d0f37..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 6190621778..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 0e7de664ec..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 795b9cd608..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_0_1_true_false", - embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_0_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 476e0135c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_true_true", - embedded_interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index bd58d97e16..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_true_true", - embedded_interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index aae27d0152..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_true_true", - embedded_interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index 50e0352998..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_true_true", - embedded_interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index a91c046f41..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_true_true", - embedded_interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index c95b0c10af..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_true_true", - embedded_interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 4b8fcf4c02..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_true_true", - embedded_interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index ae2d745de5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_1_true_true", - embedded_interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_0_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 2b2b96ace9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_4_false_false", - embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 83d8af9ca3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_4_false_false", - embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index f2d0e98389..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_0_4_false_false", - embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index cc37fff00b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_4_false_false", - embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index c6db4f78b9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_4_false_false", - embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 422b31652c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_4_false_false", - embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_0_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index f01a4fa5a6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_4_false_true", - embedded_interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_0_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 0f662c6948..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_4_false_true", - embedded_interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_0_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index a2676facf3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_4_true_false", - embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 7962f45919..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_4_true_false", - embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 05d05e32bd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_0_4_true_false", - embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index aa9f0e22e0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_4_true_false", - embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index da8a232009..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_4_true_false", - embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 0ffc4f4696..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_0_4_true_false", - embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_0_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 461723b693..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_4_true_true", - embedded_interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_0_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index c1464eb2da..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_4_true_true", - embedded_interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_0_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index f54e2f537e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_8_false_false", - embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index a956572f36..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_8_false_false", - embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index cebf4d8171..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_0_8_false_false", - embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 92c12d03af..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_8_false_false", - embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 6bd137d57f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_8_false_false", - embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 09fa2e4b0a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_8_false_false", - embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_0_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 3e064b22a7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_8_false_true", - embedded_interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_0_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 66fec75011..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_8_false_true", - embedded_interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_0_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 70f49c517c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_8_true_false", - embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 76e2117710..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_8_true_false", - embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index f2e28dbb1c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_0_8_true_false", - embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 2f191f1acd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_8_true_false", - embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index dd8cca7f9d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_0_8_true_false", - embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index ba222c208e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_0_8_true_false", - embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_0_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index ff4c740d2a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_8_true_true", - embedded_interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_0_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 2c52558937..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 0, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_0_8_true_true", - embedded_interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_0_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 87f3ad2eec..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index e027047b07..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 95ae978726..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index c2b8bf3ff0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 34395eef54..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 9511e59037..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index e59ab34eb3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 835c111636..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index c1fa3db4bc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 01f4cbb69d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index a5f815a11c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index a380f12584..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_false_false", - embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 2ef7a760e1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_16_false_true", - embedded_interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_128_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 6983e19c2c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_16_false_true", - embedded_interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_128_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 10f6f98154..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_16_false_true", - embedded_interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_128_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index b8556d8387..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_16_false_true", - embedded_interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_128_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 72e9167bf6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 4f0b292597..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 57a3df97e0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 68f2aa96a7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index af086e5511..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 2ef3c98ff8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index 1f6d0aac12..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 2172ef974d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index baac53ad25..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 852f0ffd67..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 6916ae7446..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index cb5d1422b4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_16_true_false", - embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_128_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 5402e8f0c7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_16_true_true", - embedded_interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_128_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 73ad7f4081..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_16_true_true", - embedded_interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_128_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index df0231ddbb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_16_true_true", - embedded_interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_128_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index e22fa0925d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_16_true_true", - embedded_interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_128_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index b37f8190ca..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 24859b224a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 97dc103c39..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 1578667fb1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index ee000933d6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index e2941f7e3f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 3295168c4a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 79d3caad57..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 6e2f602a12..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index a72f773b20..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index d864dc19ac..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 3f41496c69..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index d44c107782..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index bb35a3534c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index fbba323559..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index ee06503981..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index e411695221..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index c7a4d2092d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index cfb94b212f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index c7dbe0ca1f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 78772dbf04..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 7517105d71..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 7cbecb2a4e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 83bbb93cf8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_false_false", - embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 4a3e900608..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_false_true", - embedded_interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 7793be4371..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_false_true", - embedded_interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 48bb13f1cb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_false_true", - embedded_interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index b27d5fb080..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_false_true", - embedded_interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index a23ff75f32..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_false_true", - embedded_interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index d094565850..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_false_true", - embedded_interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index f41f4fcc1c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_false_true", - embedded_interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 05dbb5ef68..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_false_true", - embedded_interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 1a52df7dc0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 278dbeb71d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 3b7fb0f9fe..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index bf9c5077a5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index fe1926a544..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index d9d6a1f65e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 653ced6fdb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index bf6a8a1c26..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index f751e803a6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index 8ed4dce780..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index d014f48b27..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 38b815194e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 650e48e9ac..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index f3d53e83b1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 4d35af67c1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index de1fcb6a36..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 6340c2511e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index febd9c7363..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index b4197e7fae..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 34fbc7f821..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index a9d949c1ad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 7e6962b055..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index d95aff51fd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 86df80ac87..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_128_1_true_false", - embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_128_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 42d31e7ed0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_true_true", - embedded_interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index d19bbcac33..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_true_true", - embedded_interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 18e99098ef..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_true_true", - embedded_interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index e597e0a0aa..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_true_true", - embedded_interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index c0f8acb75c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_true_true", - embedded_interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 9e798b0dfa..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_true_true", - embedded_interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 9973ca8acc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_true_true", - embedded_interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 4a073ed5f5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_1_true_true", - embedded_interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_128_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index e57312873e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_4_false_false", - embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 36df23bd98..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_4_false_false", - embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index ff3c668629..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_4_false_false", - embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 09f044e481..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_4_false_false", - embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index bb15eedb97..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_4_false_false", - embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 7f5a871914..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_4_false_false", - embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_128_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index b0f03304f3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_4_false_true", - embedded_interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_128_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 543cb0f8f3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_4_false_true", - embedded_interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_128_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 2016405c84..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_4_true_false", - embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 73e5ca4f0d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_4_true_false", - embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index c624c73c3a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_4_true_false", - embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index bf76e1ac70..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_4_true_false", - embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 45a5acd43f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_4_true_false", - embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 2c9ea7b93e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_128_4_true_false", - embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_128_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 7d2b9bfe88..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_4_true_true", - embedded_interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_128_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index a035a55018..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_4_true_true", - embedded_interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_128_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 8eab95f347..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_8_false_false", - embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 9fb210445a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_8_false_false", - embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index c91008868a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_8_false_false", - embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 2281cf6e4f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_8_false_false", - embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 427175bd6d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_8_false_false", - embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 337728e6f5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_8_false_false", - embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_128_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 5b708524ed..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_8_false_true", - embedded_interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_128_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 1b5847c76f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_8_false_true", - embedded_interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_128_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 8a8a7e9324..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_8_true_false", - embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index d28209a103..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_8_true_false", - embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 8ef55b7351..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_8_true_false", - embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 03da5806be..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_8_true_false", - embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 2ed48617ce..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_128_8_true_false", - embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index d8ac6bc9da..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_128_8_true_false", - embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_128_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 2a58caa110..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_8_true_true", - embedded_interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_128_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index c069a00047..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 128, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_128_8_true_true", - embedded_interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_128_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 92eefc2039..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 1413cdafa4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index abb27de5ca..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index fa37cb8b61..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index d0e53d186b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 64d73b4495..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index 4dc43726a8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 604d6139fd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index c42a8d7272..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index db6b777b6c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 5f395a56b7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 253acbffad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_false_false", - embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index d9e0708b33..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_16_false_true", - embedded_interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_16_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 6f0260aa00..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_16_false_true", - embedded_interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_16_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 191099a256..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_16_false_true", - embedded_interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_16_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 16245b7e86..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_16_false_true", - embedded_interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_16_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 653cf15163..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index cdbd9ea8bb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 3b1bb91633..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index dafacc91f0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 021e33b021..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 5daf8f72fc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index 3e50b13e2e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index bc97285469..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index f057ad2263..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 0aaa596db2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 1f4dbb563f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 552d5a50a5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_16_true_false", - embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_16_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 758857e205..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_16_true_true", - embedded_interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_16_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 9afa17102e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_16_true_true", - embedded_interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_16_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 342d405807..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_16_true_true", - embedded_interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_16_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 60441c53af..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_16_true_true", - embedded_interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_16_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index acf382fc91..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 5f83c251e3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index d5f74b39a2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 9bbc77bd1e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index a77770d297..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index f00c25a49c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 22b026d4ab..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index a36fcc927b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 55cab64d9c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index d1679f93fc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 800628e98e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 9371730a2f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 10a70a4f71..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 31580d2a4c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index a7ba0c42a8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index b793c66b94..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index fcae8b176a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 2603801a52..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index e66b965708..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index bbab35578d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 5b09b40e14..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index de74044313..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 7ad19ce463..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 3442337c80..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_false_false", - embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index b18813def3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_false_true", - embedded_interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 8faaa15429..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_false_true", - embedded_interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 22e6bab8f6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_false_true", - embedded_interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index 108a8f7b7d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_false_true", - embedded_interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 163cd3dfa2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_false_true", - embedded_interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index e9f6150014..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_false_true", - embedded_interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index be90918baf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_false_true", - embedded_interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 5c20a24283..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_false_true", - embedded_interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 854f544e4b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 583fbe3341..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index b0665ac932..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 7960cd58e5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 71d11af7ad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 4a7ddb65e4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 493f330640..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 6d204dcace..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index d405cf90c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index a4c76a451f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 9b97001867..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index d5b2f36a46..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 4abed39251..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index ab06d74e29..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index bdcbeec0cb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index c5ab6751b3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index a9b3668479..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index e17836193d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index 3f6656a74c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 981d046ce8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 5d82b1de54..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 4def8a05cb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 073e517b5e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index aab4de1489..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_16_1_true_false", - embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_16_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 3f8dfe9c9a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_true_true", - embedded_interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index bece6b1112..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_true_true", - embedded_interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 7cb5d6fab6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_true_true", - embedded_interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index d5a6d4144e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_true_true", - embedded_interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index ece693de83..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_true_true", - embedded_interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 353dacd3c5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_true_true", - embedded_interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 675a02825e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_true_true", - embedded_interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 2697ef00f9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_1_true_true", - embedded_interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_16_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 250e0b0c9e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_4_false_false", - embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 9e148b175b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_4_false_false", - embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 90a90ea7ad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_4_false_false", - embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 68009aae5e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_4_false_false", - embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 18aaba6e87..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_4_false_false", - embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index dee3e092ee..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_4_false_false", - embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_16_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 63c633165b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_4_false_true", - embedded_interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_16_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 921cdcafaf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_4_false_true", - embedded_interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_16_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index d7c04730dc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_4_true_false", - embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index cfe442710f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_4_true_false", - embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 2932e3dec6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_16_4_true_false", - embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 0bca2a8d06..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_4_true_false", - embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index b05dc7a4e0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_4_true_false", - embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 616cc70c6f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_16_4_true_false", - embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_16_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 96465efddc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_4_true_true", - embedded_interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_16_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index e88abbb91c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_4_true_true", - embedded_interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_16_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index bbab804bd8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_8_false_false", - embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 108772575d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_8_false_false", - embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 1d7367270e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_8_false_false", - embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 0a93971cad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_8_false_false", - embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 180d653306..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_8_false_false", - embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 26a619cf6e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_8_false_false", - embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_16_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index a46ca7cdaa..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_8_false_true", - embedded_interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_16_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index c8c8c5a3c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_8_false_true", - embedded_interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_16_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 470fb87186..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_8_true_false", - embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 32456a1b61..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_8_true_false", - embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index b97bc1b909..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_16_8_true_false", - embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 8348b1a06a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_8_true_false", - embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 8847ca90ad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_16_8_true_false", - embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index c3cc978542..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_16_8_true_false", - embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_16_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 53e83996a1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_8_true_true", - embedded_interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_16_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index f1549bc17d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 16, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_16_8_true_true", - embedded_interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_16_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index d313b313a7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index bfe6bdfcc7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 6ec34f5a9e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 7442e84634..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 99a090ed4b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 93bc8ec319..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index 9688313a02..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 195350cd5d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index c36ee524b0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 120fc361fe..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 85e62e4717..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 8bfac9f336..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_false_false", - embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index a2d5e7748d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_16_false_true", - embedded_interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_1_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index d3ca2cb6a2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_16_false_true", - embedded_interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_1_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 59cb6e0d81..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_16_false_true", - embedded_interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_1_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 82b146a4ef..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_16_false_true", - embedded_interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_1_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index b34266cd12..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 4554e91ede..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index b064db1bbc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 1b94324b95..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 60bf7eb50a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 0e8416441e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index 62077201fc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index e4f1693464..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index 40a662bb23..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 2bd5f5fed9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 67c5ef3cd8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index d16449cf36..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_16_true_false", - embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_1_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index cfc27da409..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_16_true_true", - embedded_interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_1_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 0067a25bfa..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_16_true_true", - embedded_interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_1_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 3e5cf49c48..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_16_true_true", - embedded_interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_1_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index f79920d34d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_16_true_true", - embedded_interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_1_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 976a7f0e1c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index e87f292422..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index adc59f0f6c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 7c24d7dd56..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index c41d2b365b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index e1b95d5bb2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 82ff7537b5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 5c64bdefba..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 3b9f319648..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index 4469a2e956..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 6681a34e72..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index bff74da591..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 5296798d1f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 6988150da8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index b68a1bf0b3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index fabc4942b6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index c64303a78d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 6786bc3e83..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index 133112f435..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 7fa523d13b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index e6b0975d90..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 1f2c1e55d8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 4395e4a561..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 0161055330..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_false_false", - embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 2a57362c41..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_false_true", - embedded_interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 1d71639470..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_false_true", - embedded_interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index cc3f2abbb8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_false_true", - embedded_interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index db73777fdf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_false_true", - embedded_interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 12acd5f1ed..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_false_true", - embedded_interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 3bc8461ffc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_false_true", - embedded_interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index dd5be50218..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_false_true", - embedded_interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 8967bb9f7e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_false_true", - embedded_interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 687a786004..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 4284a60429..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index ece0076470..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index d0d19a9db4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 609f7d177d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 0decac2999..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 98b24a0a68..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 8130c8ea70..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index bd68489509..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index 05f09d8310..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 7ffb1dbd6b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index ce25bf8bb7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index da687ef63c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index d808a82d52..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 370b5db447..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm< - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>("interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index 47081c5141..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 11dfbde90c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 285780ba31..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index 895c216a37..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 6c435e5c7a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 91f99813b1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 07fb6fa949..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index ff21d82fe2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index deadb211bf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_1_1_true_false", - embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_1_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index e3ebdb71c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_true_true", - embedded_interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 10299f9d02..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_true_true", - embedded_interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index c35d53476f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_true_true", - embedded_interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index ca3dd1b462..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_true_true", - embedded_interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index acccf76a18..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_true_true", - embedded_interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 604450177c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_true_true", - embedded_interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index ae17907237..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_true_true", - embedded_interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 67ebe7b2d6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_1_true_true", - embedded_interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_1_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 0546f1a99d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_4_false_false", - embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 7ae054b18c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_4_false_false", - embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index fa40b13fee..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_1_4_false_false", - embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index a833d09554..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_4_false_false", - embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index fb8152448e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_4_false_false", - embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index ac580f6066..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_4_false_false", - embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_1_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 32ebdce235..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_4_false_true", - embedded_interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_1_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 8d4c94c2d2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_4_false_true", - embedded_interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_1_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index c671d9c54a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_4_true_false", - embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 14130e2c68..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_4_true_false", - embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 7823c9712d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_1_4_true_false", - embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 8cd1de90c7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_4_true_false", - embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 0c962ce69c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_4_true_false", - embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 864e1d21d7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_1_4_true_false", - embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_1_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 6a938e9254..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_4_true_true", - embedded_interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_1_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 92e9c626ad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_4_true_true", - embedded_interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_1_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 399b4615a6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_8_false_false", - embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 23269a4bf5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_8_false_false", - embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index f52f11e39d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_1_8_false_false", - embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index e071932889..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_8_false_false", - embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 7b8d7cc5a9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_8_false_false", - embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 2c20f0c62b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_8_false_false", - embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_1_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 1c2437b39e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_8_false_true", - embedded_interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_1_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 1c0f18d060..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_8_false_true", - embedded_interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_1_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 08022a0fb5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_8_true_false", - embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index edd92daa39..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_8_true_false", - embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 4b731550c9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_1_8_true_false", - embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 77d80a328f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_8_true_false", - embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index a62a8e96f0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_1_8_true_false", - embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 351209b2e1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_1_8_true_false", - embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_1_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index beef9b5aad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_8_true_true", - embedded_interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_1_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 2a63e36aec..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 1, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_1_8_true_true", - embedded_interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_1_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 2333d6b8e0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 920f2a7ec8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index a9883674fc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 7597579be0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 897cf3b156..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 5aed2efb0c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index bcb3380592..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 2aca305bde..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index a21e7615bf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index e8785f8db9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 890fed00c3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 887593c2e7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_false_false", - embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index fb6f191657..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_16_false_true", - embedded_interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_256_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 69824c409f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_16_false_true", - embedded_interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_256_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index d444814b43..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_16_false_true", - embedded_interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_256_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 65bcf4285e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_16_false_true", - embedded_interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_256_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 91bfdded0e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 1fc0fe4202..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 1581716962..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 1b0d7b4792..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 914f2088e5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index c9bc56d2ee..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index 5089982bd9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 8332c68c41..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index 93cc64df3c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 19a66eccdb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 8e712a4dea..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index d9cf9037de..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_16_true_false", - embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_256_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 55d904bd1c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_16_true_true", - embedded_interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_256_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 310294888a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_16_true_true", - embedded_interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_256_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 0b875f9278..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_16_true_true", - embedded_interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_256_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index f8a526cecc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_16_true_true", - embedded_interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_256_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 27892d3882..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 1bbbea185f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index b2f3329df8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 4fbe8473c0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index fc84311858..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 4756b6afe1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 9a28d4df87..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 4c188e2f37..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 4d9dddd0f1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index 5c5fef9b1e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index e20df45ffc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index e23e98b699..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index aa71191abd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index bd755c5131..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 4aa45ac61b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index 5bbbe357d0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 58f80c0744..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index a51683b078..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index a45d054588..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index d787a38d89..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 993c049805..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 04d501aaba..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index ecf6bfd879..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 6a672aa224..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_false_false", - embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 51f7020bbf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_false_true", - embedded_interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index df022fc329..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_false_true", - embedded_interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 9c6e78d06c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_false_true", - embedded_interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index e791b5f434..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_false_true", - embedded_interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 302c981f8c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_false_true", - embedded_interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index c596a720d5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_false_true", - embedded_interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 47d446651e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_false_true", - embedded_interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 03a8571341..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_false_true", - embedded_interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 2565dcc23b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index ef7c625f19..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 4a3e67d690..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 7d249e12c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 4c852c3574..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 770fa60d3b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 6221cd3450..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 8a7691e777..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 07a5bee770..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index e88fcd9729..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 55aae51950..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 99c4f33b58..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index ff4da51ccd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 91a073d140..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 7bdd72fd4a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index e2dfc52ae6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 961f1d677e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 5205a223dd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index b7b2526d3b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 6f9e638ef2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 7b18b874ff..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index deb65e968e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index a7b59706d3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 9bb5161ab2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_256_1_true_false", - embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_256_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 6272eb87c8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_true_true", - embedded_interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 2c5edd6258..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_true_true", - embedded_interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index b1cd6f2eb6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_true_true", - embedded_interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index 898783af74..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_true_true", - embedded_interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 4ff2af5fc0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_true_true", - embedded_interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 95f4a432ca..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_true_true", - embedded_interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 4fc319d843..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_true_true", - embedded_interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 0873fa7234..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_1_true_true", - embedded_interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_256_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 1930040548..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_4_false_false", - embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index edfb18876a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_4_false_false", - embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 2851a535e7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_4_false_false", - embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 324f0e0066..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_4_false_false", - embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 71fd238fa6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_4_false_false", - embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 9e41fc3142..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_4_false_false", - embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_256_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 916dc598ed..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_4_false_true", - embedded_interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_256_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 042ae90f1e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_4_false_true", - embedded_interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_256_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 1ea25107f3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_4_true_false", - embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 623e7fcec2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_4_true_false", - embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index adf12e96c2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_4_true_false", - embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 5b42d70628..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_4_true_false", - embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index b7952ac332..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_4_true_false", - embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index de5336f36a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_256_4_true_false", - embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_256_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 34fd019ddc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_4_true_true", - embedded_interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_256_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 7a5aba65b1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_4_true_true", - embedded_interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_256_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 302773ac7c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_8_false_false", - embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 5ed74f57f4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_8_false_false", - embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index f6bb6cc680..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_8_false_false", - embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 75fdc6980b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_8_false_false", - embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 61c94df89f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_8_false_false", - embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index c889f57f48..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_8_false_false", - embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_256_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 6abbca5f4d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_8_false_true", - embedded_interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_256_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 5e4ceb3a6c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_8_false_true", - embedded_interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_256_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 7b56cd27ac..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_8_true_false", - embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 2a6accb3db..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_8_true_false", - embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 577d60c51a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_8_true_false", - embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 384cca6d9c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_8_true_false", - embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index fcca5a5df7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_256_8_true_false", - embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 5ee3dc5eb8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_256_8_true_false", - embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_256_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 3b75ca6b4a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_8_true_true", - embedded_interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_256_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index f04373839e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 256, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_256_8_true_true", - embedded_interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_256_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 851dafea08..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 326d16a5b3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 03b0fb1094..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 84b62997e8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 4592a42bfc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index e583c98a16..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index e4399ffeed..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 0f8c6a6539..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index eba35b2feb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 74210afadf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 112c5ceb3b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index c7c9150318..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_false_false", - embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index e09a83dbf5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_16_false_true", - embedded_interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_2_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 9c28eee47a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_16_false_true", - embedded_interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_2_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 657cd8a30f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_16_false_true", - embedded_interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_2_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 5acaf35711..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_16_false_true", - embedded_interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_2_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index b9eeced6c7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index a11aa58461..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index ca7f591f82..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 9d7964db0e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 3fad4569a6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 6a41f637cf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index b657a3054b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index de0305103d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index ff95ad66b5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 85a07e6c1f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index b33bf7dfaa..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index df8b79da7d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_16_true_false", - embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_2_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 12430d9ccf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_16_true_true", - embedded_interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_2_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index f34d479777..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_16_true_true", - embedded_interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_2_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index d3def5e132..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_16_true_true", - embedded_interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_2_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 4c1918727d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_16_true_true", - embedded_interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_2_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 3b1a844eb3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index df6a7c7bbf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index e5577edebc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index dad28040c7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 0c40e25845..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 7e524ddf4f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index bb3db05e08..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 2e736f9dbd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 32f734cad3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index 052119a7cb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 71400a0303..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index b14d644983..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 99d128ea01..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 5570d00fda..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 3932281de2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index 7fea0189db..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 496c158434..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 65c152c081..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index 4d7653d46d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 96a79e0c64..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index aeadb7ec54..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 7b4757c326..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 7aff0d2a18..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index ce6f039b3d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_false_false", - embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index df68427895..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_false_true", - embedded_interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 68458a3f35..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_false_true", - embedded_interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index b4e685fc87..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_false_true", - embedded_interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index dc30eb63a8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_false_true", - embedded_interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index b956b82daf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_false_true", - embedded_interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 428ad7e950..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_false_true", - embedded_interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 9b22f6c4ba..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_false_true", - embedded_interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 3418b4c604..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_false_true", - embedded_interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 5072ea8489..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 37769c2376..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 56a3448df1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 0f34bc8104..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index af5984cacb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 126aa6791a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 6404610b13..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 699a2d42dd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index c38e2b0ed7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index d7f5c0d6b4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 5a00e165b3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 2a639b3d28..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index d7d3984312..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 61ad677621..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 4819f16668..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm< - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>("interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index e5283b327e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 84ab7877b8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 0aa0e7b30d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index 8d0be68864..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index c066fbada5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 09d64286ee..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index b384a02bad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index c28d6bd6a2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index cc4d935699..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_2_1_true_false", - embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_2_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 906814d41a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_true_true", - embedded_interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 8eb5c381e1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_true_true", - embedded_interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index ae8bc90488..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_true_true", - embedded_interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index 9e3b46217d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_true_true", - embedded_interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 5e017873dc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_true_true", - embedded_interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 2451e28019..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_true_true", - embedded_interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 146a0eb20f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_true_true", - embedded_interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 6e0b62afe0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_1_true_true", - embedded_interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_2_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 7ded12cdd6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_4_false_false", - embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 9204daab66..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_4_false_false", - embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 969c9f3ee2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_2_4_false_false", - embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 4f736cd922..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_4_false_false", - embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 80f6cfe7bd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_4_false_false", - embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 2eed038108..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_4_false_false", - embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_2_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 4021ac8966..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_4_false_true", - embedded_interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_2_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 1e15f114fb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_4_false_true", - embedded_interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_2_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 70c8ee1e6a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_4_true_false", - embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 395872dd06..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_4_true_false", - embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 9e4ae1f7c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_2_4_true_false", - embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 6bb0142b9f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_4_true_false", - embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index bfea159adc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_4_true_false", - embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index fe52b2dc8b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_2_4_true_false", - embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_2_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index a48306da96..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_4_true_true", - embedded_interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_2_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 56d0031442..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_4_true_true", - embedded_interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_2_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 2f89ecd349..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_8_false_false", - embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 15b0b99703..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_8_false_false", - embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 6011add826..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_2_8_false_false", - embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 64655ed240..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_8_false_false", - embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index aa125344d6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_8_false_false", - embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 6977c22672..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_8_false_false", - embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_2_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 3472906a7a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_8_false_true", - embedded_interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_2_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 4059e7f7aa..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_8_false_true", - embedded_interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_2_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 3ffc972145..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_8_true_false", - embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index a2d8a0017c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_8_true_false", - embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 1f5e5ef8eb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_2_8_true_false", - embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 250f5ba2fb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_8_true_false", - embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index e0a21e5844..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_2_8_true_false", - embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 639863a249..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_2_8_true_false", - embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_2_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index b68da192b7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_8_true_true", - embedded_interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_2_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index deb97e906b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 2, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_2_8_true_true", - embedded_interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_2_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 745d6066c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index a2a1aa9c6b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 891582bab8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index bce4e73467..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index e6f862dc87..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index f5b62cc4a5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index db123f8801..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 7505c39c6f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index 5843f0b453..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 7d70eddfa7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index a746a02ff7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 607b20ffc4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_false_false", - embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 7d1cb0e9fc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_16_false_true", - embedded_interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_32_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 46666f348c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_16_false_true", - embedded_interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_32_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index c86a738598..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_16_false_true", - embedded_interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_32_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 73f62fdab1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_16_false_true", - embedded_interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_32_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 74ddf7f2f9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 40488d2241..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index f4dafe3b38..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 28b7639880..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index eab5239a40..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 195d356657..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index 87d40dab4b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 1cec12a330..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index 720f30229b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 54510be430..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index cfc3f6dff6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 45eb046fea..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_16_true_false", - embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_32_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index c188fd475d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_16_true_true", - embedded_interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_32_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index c5baabba3f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_16_true_true", - embedded_interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_32_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 0f779800ba..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_16_true_true", - embedded_interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_32_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 961b4706e6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_16_true_true", - embedded_interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_32_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 20ebf95311..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index e1f94ae722..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 49cf8aa86f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 4e81abbb92..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index e8b07bbaf4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 633bdf3dd4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index faf17652c1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 9dc77b44fa..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 7eb6621c05..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index e2f0627bcb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index e5ba767b80..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index b1333e2245..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 7aa2dd5c1c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index c990505ef0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index c62d960518..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index 74db5c79c9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index e4f39d3438..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 0976c9ebd8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index f8eac8eabe..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index e0a1c292c2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index b03f77a8a3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index a99ff9843e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 955d462abe..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 80f260c6be..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_false_false", - embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 167e921cb3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_false_true", - embedded_interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index e8a2e2225b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_false_true", - embedded_interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index fc4e2199c1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_false_true", - embedded_interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index 3949bfcf4f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_false_true", - embedded_interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index b4274a2a1f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_false_true", - embedded_interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index c048d1443c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_false_true", - embedded_interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index e369ef6a01..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_false_true", - embedded_interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 712a540289..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_false_true", - embedded_interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 692131e2dc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 9a63fd7998..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index d627ec343d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 51bcea7219..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 3992fb774c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 6a071f31a0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 9c0f05192e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 15153f81b8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 7706ae6b11..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index f00a7d4bff..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 20280de2c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 7d23b648f8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 8b9f9f10dc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index f8e93185b0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 981b3627b4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index 33b657a4ac..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index e48f6a7148..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 6b3f51dfb8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index 6b691e3f61..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 7ccc23cefa..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 0dc495da2d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 4aaff1d29e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index ae9e2ab347..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 9c42d89630..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_32_1_true_false", - embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_32_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 655488bc0e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_true_true", - embedded_interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index c7d11cff9c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_true_true", - embedded_interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 505529a024..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_true_true", - embedded_interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index 82f2ba84fd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_true_true", - embedded_interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 0f7d69328b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_true_true", - embedded_interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index b3543f0629..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_true_true", - embedded_interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 4121b3bb68..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_true_true", - embedded_interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index fdfce5ade6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_1_true_true", - embedded_interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_32_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 666beb4ff0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_4_false_false", - embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index d573227b4a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_4_false_false", - embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 5a8d8b9380..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_4_false_false", - embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index ca6b2e37b4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_4_false_false", - embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 5326c9c85b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_4_false_false", - embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 2943f6f0e8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_4_false_false", - embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_32_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index e2a6ea97f3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_4_false_true", - embedded_interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_32_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 5c66a8a258..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_4_false_true", - embedded_interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_32_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 1fac0fc6df..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_4_true_false", - embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 20e99961ad..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_4_true_false", - embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index e2ca3078b0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_32_4_true_false", - embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 50da6d130b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_4_true_false", - embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index e232731e65..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_4_true_false", - embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 03012dcb4f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_32_4_true_false", - embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_32_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 059750251f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_4_true_true", - embedded_interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_32_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index d3c359fb87..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_4_true_true", - embedded_interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_32_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index b43f3154a5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_8_false_false", - embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index a6b5fd2548..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_8_false_false", - embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index bc69abe2ea..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_8_false_false", - embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 32ad26e987..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_8_false_false", - embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index e5eaa4f2d3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_8_false_false", - embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index fe96f555bc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_8_false_false", - embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_32_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index bbe9f9fc88..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_8_false_true", - embedded_interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_32_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 8b8c378154..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_8_false_true", - embedded_interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_32_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index fdff0f312f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_8_true_false", - embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index ad3f5de3eb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_8_true_false", - embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index e0fbf9e783..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_32_8_true_false", - embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 380f09c160..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_8_true_false", - embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index f3178fe37f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_32_8_true_false", - embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index a27f7186bb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_32_8_true_false", - embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_32_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 91b64add4f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_8_true_true", - embedded_interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_32_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 09574ccb96..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 32, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_32_8_true_true", - embedded_interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_32_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 763b0b2b5a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 939c9709f1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 8436db548a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 921de1a4df..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 6622d3c9ca..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 58a437e400..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index ecda14812a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 97df6c86d6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index 0e46fa23c7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 4e37e49af1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 7685ed1706..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 6a97de5b3f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_false_false", - embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 695bc50e41..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_16_false_true", - embedded_interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_4_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 64b9d7fc3e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_16_false_true", - embedded_interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_4_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 76f787b727..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_16_false_true", - embedded_interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_4_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 0a2e4b2b76..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_16_false_true", - embedded_interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_4_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 3ffe23abf4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index d9dc2c15df..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 4f965114be..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index facf2bc00d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index a8c2f0879a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 8fd79f92f4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index e02ed08720..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 40d0985180..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index ce6575eed3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 3d33a1ae90..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index b493264e1c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index ab76b4239a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_16_true_false", - embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_4_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 0696e1409b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_16_true_true", - embedded_interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_4_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 272dac4ad4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_16_true_true", - embedded_interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_4_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 53c57a5cb9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_16_true_true", - embedded_interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_4_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 6d1870445e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_16_true_true", - embedded_interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_4_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 2f289a8ded..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index c2cd6e34a2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 337d99a1c6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 17377d2b5d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 67144e5dee..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 8a0d55edf9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index e7740674c0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 3fda2a8d91..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index ad54b4d903..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index cb220fcc28..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index b958645eef..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 143afbc924..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 4bcb94b825..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index f35dd50b86..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 266168e29d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index ff0d8f312b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 1592a020e9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index bc0c1035f8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index e3e33db5c1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 01798fcd83..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index b2976af3ba..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 47daa2c1dc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 98cc30fe49..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 3df7b36c1c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_false_false", - embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 07e8eb3149..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_false_true", - embedded_interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 4d31877a9c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_false_true", - embedded_interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 59560e17a5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_false_true", - embedded_interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index 8458b8fd97..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_false_true", - embedded_interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 652b21f67e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_false_true", - embedded_interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index ff36ea3f1d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_false_true", - embedded_interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index a08c7548a4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_false_true", - embedded_interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index e6b89c53fc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_false_true", - embedded_interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index f6efe8349f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 4acfa185b9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 21dd415863..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index b897e5700c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index c0ea7501ca..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index d68c4d351b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index f525f7a18c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 0fc3f37dce..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index e5d57123c6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index 235142e24d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 09364c0c63..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 0b1ac5b3f8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 497421e3d7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 8de775753e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 878dc72ce2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm< - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>("interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index a7a15502d7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index b791076b4c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 8d7b041099..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index fe1bf30af3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 915b48f6e6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 65b01a3598..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index a7b4e04edc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 8c0a8ed1d0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index b017f6a277..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_4_1_true_false", - embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_4_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index c5ba80e29d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_true_true", - embedded_interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index d9ec96f967..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_true_true", - embedded_interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 19ac034898..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_true_true", - embedded_interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index f1090aef2c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_true_true", - embedded_interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index e5f104793d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_true_true", - embedded_interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 7a2fee4d48..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_true_true", - embedded_interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index f100539318..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_true_true", - embedded_interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 0144a4dcff..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_1_true_true", - embedded_interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_4_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index b63104d923..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_4_false_false", - embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 146d228c9c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_4_false_false", - embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 23bd49d772..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_4_4_false_false", - embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 03054e2e72..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_4_false_false", - embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 6c7549b2d3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_4_false_false", - embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 3da31a31e3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_4_false_false", - embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_4_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 2ce228fbaa..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_4_false_true", - embedded_interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_4_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index b286ec543d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_4_false_true", - embedded_interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_4_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 5044092464..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_4_true_false", - embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 116446d53c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_4_true_false", - embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index fc10620cf6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_4_4_true_false", - embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index e477e78b37..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_4_true_false", - embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 25a8b28551..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_4_true_false", - embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 03d7eaac1f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_4_4_true_false", - embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_4_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index ccc5bcb936..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_4_true_true", - embedded_interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_4_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 3da6296356..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_4_true_true", - embedded_interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_4_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 4e074b0f24..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_8_false_false", - embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 86ac6b61db..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_8_false_false", - embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 9c1bdad603..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_4_8_false_false", - embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index e2a429172e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_8_false_false", - embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 79bd8c94a7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_8_false_false", - embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 18e744f6c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_8_false_false", - embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_4_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 6f1b7867d5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_8_false_true", - embedded_interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_4_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index fbf1e96e12..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_8_false_true", - embedded_interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_4_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 66c3139943..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_8_true_false", - embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 02a624edf2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_8_true_false", - embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 3cda9a0cae..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_4_8_true_false", - embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index a7165608a2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_8_true_false", - embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 04302f2eb4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_4_8_true_false", - embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 28fcb4ed12..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_4_8_true_false", - embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_4_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index b5d5d5b43d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_8_true_true", - embedded_interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_4_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index fa9bd46c1f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 4, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_4_8_true_true", - embedded_interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_4_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 83de96f840..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 504d51a4b2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index cf0ecf5eef..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 14b8a6deef..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 6e0fe1e1ef..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index d741bdf9a3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index e191d36354..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index f00e593299..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index 7a73fa2364..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index d073c5bdd1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 6504458c4c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 024489ce7d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_false_false", - embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index f5c6838d2d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_16_false_true", - embedded_interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_64_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 2d437c349b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_16_false_true", - embedded_interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_64_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 6919f2f6c6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_16_false_true", - embedded_interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_64_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 23a2baeff0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_16_false_true", - embedded_interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_64_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 4f23f0e822..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 3bcadc4c8e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 62c11742a1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index c2efc348f9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index b6e8b9832f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 6df25297da..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index db5813e84e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 072fa51ba4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index 2bb058c11a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index cd16f44adb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 00b518a587..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 54b90ba848..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_16_true_false", - embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_64_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 9521f519c8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_16_true_true", - embedded_interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_64_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 54f2a6816b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_16_true_true", - embedded_interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_64_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 74d84fde79..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_16_true_true", - embedded_interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_64_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 66b555bc1c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_16_true_true", - embedded_interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_64_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index aca61f47d6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 37277e3580..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 8ade867da9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index e10d1d4344..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 0d5e698d3f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 8830a301a6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index aaf86661f3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index e6146ad03f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 2821e04e16..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index f405eea8a4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 8f20aee192..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index a51a2cccba..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index fa0d64ea9f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 21ae76eade..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index a65523a598..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index fe0921ebd0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index b01b7256a6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 327b7d1792..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index c7b0d7db6d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 4b82034013..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index e0639772a6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 0d14b99b43..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index a1cb5204a5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 761b1b84f9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_false_false", - embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 2fb9304a71..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_false_true", - embedded_interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 7377259f3c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_false_true", - embedded_interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 8491937edd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_false_true", - embedded_interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index 17e39882b2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_false_true", - embedded_interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index bb13554ae8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_false_true", - embedded_interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 4586b84807..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_false_true", - embedded_interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 539cf86101..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_false_true", - embedded_interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 555f0a0094..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_false_true", - embedded_interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 8eae2e2295..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index ec32b38101..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index 5bbae73f63..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index afdfd9ee5a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 6e1ae09b17..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 24052094e5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index 1d8e98431a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index acbb5a4716..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 307982d41c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index a7d41d73fd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index fe6a60aa08..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index 32b223f46d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index c4685af733..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index c16b8b5e5b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index f821f96f29..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index a2e661a4f0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 60059dacd9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 51e0369588..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index 1bf2c3cfc7..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 1da8e0aaa0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 25bfcfe03c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index 07a8ab4fac..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 21da741531..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 3b54a48e11..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_64_1_true_false", - embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_64_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 6336391044..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_true_true", - embedded_interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 88c46b76ab..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_true_true", - embedded_interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index ba7f1c6995..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_true_true", - embedded_interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index 84c51bd68e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_true_true", - embedded_interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 8e4842998e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_true_true", - embedded_interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 5e29122033..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_true_true", - embedded_interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 8e80e9be05..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_true_true", - embedded_interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index 53b3dc1a87..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_1_true_true", - embedded_interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_64_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index acfd708f76..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_4_false_false", - embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index d032ddcc4e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_4_false_false", - embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 19631506de..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_4_false_false", - embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 3c6a2acc43..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_4_false_false", - embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 4bd3838e99..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_4_false_false", - embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 40c7704a1e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_4_false_false", - embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_64_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 1342bfad21..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_4_false_true", - embedded_interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_64_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index f9e2b8d098..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_4_false_true", - embedded_interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_64_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 8323e30c3e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_4_true_false", - embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 480c08c544..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_4_true_false", - embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 20ae98adf2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_64_4_true_false", - embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index b22a3812e1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_4_true_false", - embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 7e92ddd425..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_4_true_false", - embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index e1cebc0fe2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_64_4_true_false", - embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_64_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 2730174bb3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_4_true_true", - embedded_interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_64_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index ee06b163d6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_4_true_true", - embedded_interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_64_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 091618ef69..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_8_false_false", - embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index 01cdac1563..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_8_false_false", - embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 0cec38f6c3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_8_false_false", - embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 78a63322f0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_8_false_false", - embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 19820c1856..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_8_false_false", - embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 9596b2b7f0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_8_false_false", - embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_64_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index ead0281476..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_8_false_true", - embedded_interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_64_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 408bf0abe1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_8_false_true", - embedded_interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_64_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index f924a0e21c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_8_true_false", - embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index a5b2f0a835..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_8_true_false", - embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index b616e846f0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_64_8_true_false", - embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index 5f464f40ee..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_8_true_false", - embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 9fe52ce048..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_64_8_true_false", - embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index 09e8cc46fc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_64_8_true_false", - embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_64_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 1f748a0545..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_8_true_true", - embedded_interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_64_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 8e94466e06..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 64, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_64_8_true_true", - embedded_interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_64_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 9080e4fd3d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 8924196073..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index e2f65450fc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 676dcd4c04..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index a73c9610dc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index dcc8a50e69..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index af16b128c4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index e562b24c99..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index 598f0fa9ab..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index a483ca9a04..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index 533cecce98..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 61321f7b85..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_false_false", - embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_false_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index ef35cab046..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_16_false_true", - embedded_interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_8_16_false_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 7ffd93cef8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_16_false_true", - embedded_interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_8_16_false_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index 053970a47c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_16_false_true", - embedded_interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_8_16_false_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 9be78447bc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_16_false_true", - embedded_interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_8_16_false_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu deleted file mode 100644 index 0efbf00872..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 4c3197e321..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu deleted file mode 100644 index 7fd65caf58..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu deleted file mode 100644 index 19c3f6d59c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index fa8a8e4843..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu deleted file mode 100644 index 7b9a6bac5b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_sc_i_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu deleted file mode 100644 index 6d823854ae..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu deleted file mode 100644 index 448e8d9468..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu deleted file mode 100644 index 5fe822da79..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_b_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu deleted file mode 100644 index 41d6a505a5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu deleted file mode 100644 index bc7400fc8c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_euclidean_16_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu deleted file mode 100644 index 6a996b1e36..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_16_true_false", - embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id, - sizeof(embedded_interleaved_scan_kernel_8_16_true_false_uc_ui_l_n_inner_16_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu deleted file mode 100644 index 9ece40adae..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_16_true_true", - embedded_interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_8_16_true_true_sc_i_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu deleted file mode 100644 index 5f63240d6e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_16_true_true", - embedded_interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_8_16_true_true_sc_i_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu deleted file mode 100644 index a83f27c084..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_16_true_true", - embedded_interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_8_16_true_true_uc_ui_l_b_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu deleted file mode 100644 index 6d71c99e81..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 16, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_16_true_true", - embedded_interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose, - sizeof(embedded_interleaved_scan_kernel_8_16_true_true_uc_ui_l_n_inner_16_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 8ebec5238c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 391fb0d291..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index b08f946298..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index 4936d3967b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index bc8be9056f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index 680dff882e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index a9f80b78ea..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 43685d4b2e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index d63cf65ff3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index c7f42e5510..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 498c6bb4a2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index adb311a23e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index e01aa05f2d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 28db7f2c57..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index d4a9cee9d9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index 1e5e5aa183..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index f4350fc264..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 1d68310436..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index fc5d11e4d3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index f8c2d688f3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index bb8d7ad0b5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index f0d0da2442..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index d8d8c180a8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 4c7e75e9cc..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_false_false", - embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_false_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 2417a71cbf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_false_true", - embedded_interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_false_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index b2f75be725..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_false_true", - embedded_interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_false_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index 37e001bb23..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_false_true", - embedded_interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_false_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index def28cff07..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_false_true", - embedded_interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_false_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index b002057d62..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_false_true", - embedded_interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_false_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 67897b6a25..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_false_true", - embedded_interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_false_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index 9a1c1ed047..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_false_true", - embedded_interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_false_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index ed923f0649..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - false, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_false_true", - embedded_interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_false_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu deleted file mode 100644 index 559f409c6b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 42749db625..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu deleted file mode 100644 index a31e838d03..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>("interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu deleted file mode 100644 index d29b19f227..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 60a55a3014..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu deleted file mode 100644 index d7705ed8d4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_f_f_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu deleted file mode 100644 index ff25a44c6d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 3c0469c50e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu deleted file mode 100644 index 1b2cbdaf47..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu deleted file mode 100644 index 19a217b65b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 560653b426..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu deleted file mode 100644 index fbc10678a4..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_h_h_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu deleted file mode 100644 index 39fb1fe146..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 6b0ce53866..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu deleted file mode 100644 index 863b283267..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id() -{ - registerAlgorithm< - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>("interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu deleted file mode 100644 index 26ebd4c55e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 2568e34515..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu deleted file mode 100644 index 1de92668bf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_sc_i_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu deleted file mode 100644 index 065e18cf8c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu deleted file mode 100644 index 98d443a8e5..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu deleted file mode 100644 index 9fbeeb93f3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_b_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu deleted file mode 100644 index fd72f2db70..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu deleted file mode 100644 index 1236530e31..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_euclidean_1_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu deleted file mode 100644 index 27daa156c9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - false, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::identity_op>( - "interleaved_scan_kernel_8_1_true_false", - embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id, - sizeof(embedded_interleaved_scan_kernel_8_1_true_false_uc_ui_l_n_inner_1_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu deleted file mode 100644 index 5972b2094d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_true_true", - embedded_interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_true_true_f_f_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu deleted file mode 100644 index 6f4a0007ed..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_true_true", - embedded_interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_true_true_f_f_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu deleted file mode 100644 index d3feba7c29..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_true_true", - embedded_interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_true_true_h_h_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu deleted file mode 100644 index b6a32d007f..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_true_true", - embedded_interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_true_true_h_h_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu deleted file mode 100644 index 79af215b41..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_true_true", - embedded_interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_true_true_sc_i_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu deleted file mode 100644 index 6e12d0d238..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - true, - signed char, - int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - signed char const*, - unsigned int const*, - signed char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_true_true", - embedded_interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_true_true_sc_i_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu deleted file mode 100644 index efabdf73f3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_true_true", - embedded_interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_true_true_uc_ui_l_b_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu deleted file mode 100644 index bf4d24eca9..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 1, - true, - true, - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - unsigned char const*, - unsigned int const*, - unsigned char const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose() -{ - registerAlgorithm< - unsigned char, - unsigned int, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_1_true_true", - embedded_interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose, - sizeof(embedded_interleaved_scan_kernel_8_1_true_true_uc_ui_l_n_inner_1_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 51a1dd6497..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_4_false_false", - embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index 9124cb21c6..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_4_false_false", - embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 43bd67783c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_8_4_false_false", - embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index f08df71214..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_4_false_false", - embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index c3de94d319..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_4_false_false", - embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 5d54a32d0e..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - false, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_4_false_false", - embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_8_4_false_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 17ab3dc3f1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_4_false_true", - embedded_interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_8_4_false_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index b070340f5b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - false, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_4_false_true", - embedded_interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_8_4_false_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu deleted file mode 100644 index 3eae120f1b..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_4_true_false", - embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu deleted file mode 100644 index f6015566c8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_4_true_false", - embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu deleted file mode 100644 index 2c36e5f0ef..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id() -{ - registerAlgorithm< - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>("interleaved_scan_kernel_8_4_true_false", - embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_b_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu deleted file mode 100644 index 7a2a19a65d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_4_true_false", - embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id, - sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu deleted file mode 100644 index 0f19eda8b2..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_4_true_false", - embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_euclidean_4_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu deleted file mode 100644 index 08325c1143..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - true, - false, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::identity_op>( - "interleaved_scan_kernel_8_4_true_false", - embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id, - sizeof(embedded_interleaved_scan_kernel_8_4_true_false_f_f_l_n_inner_4_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu deleted file mode 100644 index 7addbaf8d0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose() -{ - registerAlgorithm>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_4_true_true", - embedded_interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_8_4_true_true_f_f_l_b_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu deleted file mode 100644 index 05b66d8bb0..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 4, - true, - true, - float, - float, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - float const*, - unsigned int const*, - float const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose() -{ - registerAlgorithm, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_4_true_true", - embedded_interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose, - sizeof(embedded_interleaved_scan_kernel_8_4_true_true_f_f_l_n_inner_4_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index c64c2177cb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_8_false_false", - embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index b1f0c24f5c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_8_false_false", - embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 20db7b00de..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_8_8_false_false", - embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index a21aa93306..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_8_false_false", - embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index cbde41dc21..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_8_false_false", - embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index e930a9d7c3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - false, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_8_false_false", - embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_8_8_false_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index 9a2dd381e3..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_8_false_true", - embedded_interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_8_8_false_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 9f229550c8..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - false, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_8_false_true", - embedded_interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_8_8_false_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu deleted file mode 100644 index 599d8c09bd..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_8_true_false", - embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu deleted file mode 100644 index f35185bead..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_8_true_false", - embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu deleted file mode 100644 index 940bda850c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id() -{ - registerAlgorithm< - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>("interleaved_scan_kernel_8_8_true_false", - embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_b_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu deleted file mode 100644 index e669b20128..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_8_true_false", - embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id, - sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu deleted file mode 100644 index 4af434e8ce..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, - raft::sqrt_op>( - "interleaved_scan_kernel_8_8_true_false", - embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt, - sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_euclidean_8_sqrt)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu deleted file mode 100644 index a51b29bfcb..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - true, - false, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::identity_op>( - "interleaved_scan_kernel_8_8_true_false", - embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id, - sizeof(embedded_interleaved_scan_kernel_8_8_true_false_h_h_l_n_inner_8_id)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu deleted file mode 100644 index a9816599cf..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.cu +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering:: - ivf_to_sample_filter>, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter< - long, - cuvs::neighbors::filtering::bitset_filter>, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_8_true_true", - embedded_interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_8_8_true_true_h_h_l_b_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu deleted file mode 100644 index 9978b5ad4c..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.cu +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ivf_flat_interleaved_scan.cuh" - -#ifdef BUILD_KERNEL - -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel< - 8, - 8, - true, - true, - __half, - __half, - long, - cuvs::neighbors::filtering::ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>, - unsigned int, - __half const*, - unsigned int const*, - __half const* const*, - unsigned int const*, - unsigned int, - unsigned int, - unsigned int, - unsigned int, - unsigned int const*, - unsigned int, - cuvs::neighbors::filtering::ivf_to_sample_filter, - unsigned int*, - float*); - -#else - -#include "interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose.h" -#include - -__attribute__((__constructor__)) static void -register_interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose() -{ - registerAlgorithm<__half, - __half, - long, - cuvs::neighbors::filtering:: - ivf_to_sample_filter, - cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, - raft::compose_op, - raft::plug_const_op>>( - "interleaved_scan_kernel_8_8_true_true", - embedded_interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose, - sizeof(embedded_interleaved_scan_kernel_8_8_true_true_h_h_l_n_inner_8_compose)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp index a92ee801c7..6f65a90612 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -21,7 +21,7 @@ #include #include -std::string bool_to_string(bool b) { return b ? "true" : "false"; } +inline std::string bool_to_string(bool b) { return b ? "true" : "false"; } template struct InterleavedScanPlanner : AlgorithmPlanner { diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp new file mode 100644 index 0000000000..6fc47a1deb --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace cuvs::neighbors::ivf_flat::detail { + +// Tag types for data types +struct tag_float {}; +struct tag_half {}; +struct tag_int8 {}; +struct tag_uint8 {}; + +// Tag types for accumulator types +struct tag_acc_float {}; +struct tag_acc_half {}; +struct tag_acc_int32 {}; +struct tag_acc_uint32 {}; + +// Tag types for index types +struct tag_idx_int64 {}; + +// Tag types for filter subtypes +struct tag_filter_bitset_impl {}; +struct tag_filter_none_impl {}; + +// Tag types for sample filter types with full template info +template +struct tag_filter {}; + +// Tag types for distance metrics with full template info +template +struct tag_metric_euclidean {}; + +template +struct tag_metric_inner_product {}; + +// Tag types for post-processing +struct tag_post_identity {}; +struct tag_post_sqrt {}; +struct tag_post_compose {}; + +} // namespace cuvs::neighbors::ivf_flat::detail From eb2d74b28e72c8b0d75fc35c749128900cb96a99 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sun, 5 Oct 2025 03:50:28 +0000 Subject: [PATCH 004/158] passing tests --- .gitignore | 2 +- cpp/cmake/modules/generate_interleaved_scan_kernels.cmake | 5 +++-- .../neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 198f847982..f77574c2ab 100644 --- a/.gitignore +++ b/.gitignore @@ -91,4 +91,4 @@ ivf_pq_index # jit lto kernels cpp/cmake/jit_lto_kernels_list/ -cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel_*.cu +cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels/interleaved_scan_kernel_*.cu diff --git a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake index 279bd7e7c7..39c6242764 100644 --- a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake +++ b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake @@ -43,12 +43,13 @@ function(generate_interleaved_scan_kernels) message(STATUS "Generating interleaved scan kernels for the first time...") execute_process( COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} - WORKING_DIRECTORY ${OUTPUT_DIR} RESULT_VARIABLE GENERATION_RESULT + OUTPUT_VARIABLE GENERATION_OUTPUT + ERROR_VARIABLE GENERATION_ERROR ) if(NOT GENERATION_RESULT EQUAL 0) - message(FATAL_ERROR "Failed to generate kernel files during configuration") + message(FATAL_ERROR "Failed to generate kernel files during configuration\nOutput: ${GENERATION_OUTPUT}\nError: ${GENERATION_ERROR}") endif() endif() diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py index 23c90a0cd7..de5839c722 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py @@ -294,8 +294,9 @@ def main(): with open(kernels_file, 'r') as f: lines = f.readlines() - # Output directory (same directory as the script) - output_dir = script_dir + # Output directory (interleaved_scan_kernels subdirectory) + output_dir = script_dir / 'interleaved_scan_kernels' + output_dir.mkdir(parents=True, exist_ok=True) # Parse all kernels and generate files generated_files = [] From d2318e8c682df7b5a96064a063d9c739aab57cc4 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 6 Oct 2025 05:41:53 +0000 Subject: [PATCH 005/158] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f77574c2ab..cc96d157f3 100644 --- a/.gitignore +++ b/.gitignore @@ -92,3 +92,4 @@ ivf_pq_index # jit lto kernels cpp/cmake/jit_lto_kernels_list/ cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels/interleaved_scan_kernel_*.cu +cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_device_functions/*.cu From 5e6afcd28884923e9a6e504432007ea653b46e2a Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 6 Oct 2025 05:45:37 +0000 Subject: [PATCH 006/158] separate out distance function from main kernel --- cpp/CMakeLists.txt | 1 + .../generate_interleaved_scan_kernels.cmake | 17 +- .../ivf_flat/ivf_flat_interleaved_scan.cuh | 268 ++++++++---------- .../jit_lto_kernels/generate_kernels.py | 236 +++++++++++++-- .../interleaved_scan_planner.hpp | 16 ++ .../jit_lto_kernels/metric_euclidean_dist.cuh | 71 +++++ .../jit_lto_kernels/metric_inner_product.cuh | 42 +++ 7 files changed, 459 insertions(+), 192 deletions(-) create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 966c2c3349..3dc60eee5f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -573,6 +573,7 @@ if(NOT BUILD_CPU_ONLY) add_library( jit_lto_fatbins OBJECT ${INTERLEAVED_SCAN_KERNEL_FILES} + ${METRIC_DEVICE_FUNCTION_FILES} ) # Make sure the kernels are generated before we try to build them diff --git a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake index 39c6242764..6089faf676 100644 --- a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake +++ b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake @@ -56,14 +56,21 @@ function(generate_interleaved_scan_kernels) # Include the generated CMake file include(${CMAKE_LIST_FILE}) - # Prepend the source directory path to all files - set(FULL_PATH_FILES) + # Prepend the source directory path to all kernel files + set(FULL_PATH_KERNEL_FILES) foreach(kernel_file ${INTERLEAVED_SCAN_KERNEL_FILES}) - list(APPEND FULL_PATH_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${kernel_file}) + list(APPEND FULL_PATH_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${kernel_file}) endforeach() - # Return the list to parent scope - set(INTERLEAVED_SCAN_KERNEL_FILES ${FULL_PATH_FILES} PARENT_SCOPE) + # Prepend the source directory path to all metric device function files + set(FULL_PATH_METRIC_FILES) + foreach(metric_file ${METRIC_DEVICE_FUNCTION_FILES}) + list(APPEND FULL_PATH_METRIC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${metric_file}) + endforeach() + + # Return the lists to parent scope + set(INTERLEAVED_SCAN_KERNEL_FILES ${FULL_PATH_KERNEL_FILES} PARENT_SCOPE) + set(METRIC_DEVICE_FUNCTION_FILES ${FULL_PATH_METRIC_FILES} PARENT_SCOPE) set(INTERLEAVED_SCAN_KERNELS_STAMP ${STAMP_FILE} PARENT_SCOPE) set(INTERLEAVED_SCAN_KERNELS_TARGET generate_interleaved_scan_kernels_target PARENT_SCOPE) endfunction() diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index a8059fd381..03a11934f4 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -43,57 +43,7 @@ using namespace cuvs::spatial::knn::detail; // NOLINT constexpr int kThreadsPerBlock = 128; template -struct euclidean_dist { - __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) - { - const auto diff = x - y; - acc += diff * diff; - } -}; - -template -struct euclidean_dist { - __device__ __forceinline__ void operator()(uint32_t& acc, uint32_t x, uint32_t y) - { - if constexpr (Veclen > 1) { - const auto diff = __vabsdiffu4(x, y); - acc = raft::dp4a(diff, diff, acc); - } else { - const auto diff = __usad(x, y, 0u); - acc += diff * diff; - } - } -}; - -template -struct euclidean_dist { - __device__ __forceinline__ void operator()(int32_t& acc, int32_t x, int32_t y) - { - if constexpr (Veclen > 1) { - // Note that we enforce here that the unsigned version of dp4a is used, because the difference - // between two int8 numbers can be greater than 127 and therefore represented as a negative - // number in int8. Casting from int8 to int32 would yield incorrect results, while casting - // from uint8 to uint32 is correct. - const auto diff = __vabsdiffs4(x, y); - acc = raft::dp4a(diff, diff, static_cast(acc)); - } else { - const auto diff = x - y; - acc += diff * diff; - } - } -}; - -template -struct inner_prod_dist { - __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) - { - if constexpr (Veclen > 1 && (std::is_same_v || std::is_same_v)) { - acc = raft::dp4a(x, y, acc); - } else { - acc += x * y; - } - } -}; +extern __device__ void compute_dist(AccT& acc, AccT x, AccT y); // Constexpr mapping functions from actual types to tags template @@ -135,20 +85,31 @@ constexpr auto get_filter_type_tag() } } -template -constexpr auto get_metric_tag() +// template +// constexpr auto get_metric_tag() +// { +// // Get tags for T and AccT +// auto t_tag = get_data_type_tag(); +// auto acc_tag = get_acc_type_tag(); + +// // Check for euclidean_dist and return templated tag with tag types +// if constexpr (std::is_same_v>) { +// return tag_metric_euclidean{}; +// } +// // Check for inner_prod_dist and return templated tag with tag types +// if constexpr (std::is_same_v>) { +// return tag_metric_inner_product{}; +// } +// } + +template +constexpr auto get_metric_name() { - // Get tags for T and AccT - auto t_tag = get_data_type_tag(); - auto acc_tag = get_acc_type_tag(); - - // Check for euclidean_dist and return templated tag with tag types - if constexpr (std::is_same_v>) { - return tag_metric_euclidean{}; + if constexpr (std::is_same_v>) { + return "euclidean"; } - // Check for inner_prod_dist and return templated tag with tag types - if constexpr (std::is_same_v>) { - return tag_metric_inner_product{}; + if constexpr (std::is_same_v>) { + return "inner_prod"; } } @@ -224,16 +185,14 @@ __device__ inline void copy_vectorized(T* out, const T* in, uint32_t n) * @tparam AccT type of the accumulated value (an optimization for 8bit values to be loaded as 32bit * values) */ -template +template struct loadAndComputeDist { - Lambda compute_dist; AccT& dist; AccT& norm_query; AccT& norm_data; - __device__ __forceinline__ - loadAndComputeDist(AccT& dist, Lambda op, AccT& norm_query, AccT& norm_data) - : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + __device__ __forceinline__ loadAndComputeDist(AccT& dist, AccT& norm_query, AccT& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) { } @@ -256,7 +215,7 @@ struct loadAndComputeDist { raft::lds(queryRegs, &query_shared[shmemIndex + j * Veclen]); #pragma unroll for (int k = 0; k < Veclen; ++k) { - compute_dist(dist, queryRegs[k], encV[k]); + compute_dist(dist, queryRegs[k], encV[k]); if constexpr (ComputeNorm) { norm_query += queryRegs[k] * queryRegs[k]; norm_data += encV[k] * encV[k]; @@ -291,7 +250,7 @@ struct loadAndComputeDist { #pragma unroll for (int k = 0; k < Veclen; ++k) { T q = raft::shfl(queryReg, d + k, raft::WarpSize); - compute_dist(dist, q, encV[k]); + compute_dist(dist, q, encV[k]); if constexpr (ComputeNorm) { norm_query += q * q; norm_data += encV[k] * encV[k]; @@ -317,7 +276,7 @@ struct loadAndComputeDist { #pragma unroll for (int k = 0; k < Veclen; k++) { T q = raft::shfl(queryReg, d + k, raft::WarpSize); - compute_dist(dist, q, enc[k]); + compute_dist(dist, q, enc[k]); if constexpr (ComputeNorm) { norm_query += q * q; norm_data += enc[k] * enc[k]; @@ -328,16 +287,16 @@ struct loadAndComputeDist { }; // This handles uint8_t 8, 16 Veclens -template -struct loadAndComputeDist { - Lambda compute_dist; +template +struct loadAndComputeDist { uint32_t& dist; uint32_t& norm_query; uint32_t& norm_data; - __device__ __forceinline__ - loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) - : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, + uint32_t& norm_query, + uint32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) { } @@ -359,7 +318,7 @@ struct loadAndComputeDist(query_shared + shmemIndex) + j * veclen_int); #pragma unroll for (int k = 0; k < veclen_int; k++) { - compute_dist(dist, queryRegs[k], encV[k]); + compute_dist(dist, queryRegs[k], encV[k]); if constexpr (ComputeNorm) { norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); norm_data = raft::dp4a(encV[k], encV[k], norm_data); @@ -389,7 +348,7 @@ struct loadAndComputeDist(dist, q, encV[k]); if constexpr (ComputeNorm) { norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(encV[k], encV[k], norm_data); @@ -415,7 +374,7 @@ struct loadAndComputeDist(dist, q, enc[k]); if constexpr (ComputeNorm) { norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(enc[k], enc[k], norm_data); @@ -427,16 +386,16 @@ struct loadAndComputeDist -struct loadAndComputeDist { - Lambda compute_dist; +template +struct loadAndComputeDist { uint32_t& dist; uint32_t& norm_query; uint32_t& norm_data; - __device__ __forceinline__ - loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) - : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, + uint32_t& norm_query, + uint32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) { } @@ -449,7 +408,7 @@ struct loadAndComputeDist { for (int j = 0; j < kUnroll; ++j) { uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; - compute_dist(dist, queryRegs, encV); + compute_dist<4, uint8_t, uint32_t>(dist, queryRegs, encV); if constexpr (ComputeNorm) { norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); norm_data = raft::dp4a(encV, encV, norm_data); @@ -472,7 +431,7 @@ struct loadAndComputeDist { for (int j = 0; j < kUnroll; ++j) { uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist(dist, q, encV); + compute_dist<4, uint8_t, uint32_t>(dist, q, encV); if constexpr (ComputeNorm) { norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(encV, encV, norm_data); @@ -493,7 +452,7 @@ struct loadAndComputeDist { for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { uint32_t enc = reinterpret_cast(data)[lane_id]; uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); - compute_dist(dist, q, enc); + compute_dist<4, uint8_t, uint32_t>(dist, q, enc); if constexpr (ComputeNorm) { norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(enc, enc, norm_data); @@ -502,16 +461,16 @@ struct loadAndComputeDist { } }; -template -struct loadAndComputeDist { - Lambda compute_dist; +template +struct loadAndComputeDist { uint32_t& dist; uint32_t& norm_query; uint32_t& norm_data; - __device__ __forceinline__ - loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) - : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, + uint32_t& norm_query, + uint32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) { } @@ -524,7 +483,7 @@ struct loadAndComputeDist { for (int j = 0; j < kUnroll; ++j) { uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; - compute_dist(dist, queryRegs, encV); + compute_dist<2, uint8_t, uint32_t>(dist, queryRegs, encV); if constexpr (ComputeNorm) { norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); norm_data = raft::dp4a(encV, encV, norm_data); @@ -548,7 +507,7 @@ struct loadAndComputeDist { for (int j = 0; j < kUnroll; ++j) { uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist(dist, q, encV); + compute_dist<2, uint8_t, uint32_t>(dist, q, encV); if constexpr (ComputeNorm) { norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(encV, encV, norm_data); @@ -569,7 +528,7 @@ struct loadAndComputeDist { for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { uint32_t enc = reinterpret_cast(data)[lane_id]; uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); - compute_dist(dist, q, enc); + compute_dist<2, uint8_t, uint32_t>(dist, q, enc); if constexpr (ComputeNorm) { norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(enc, enc, norm_data); @@ -578,16 +537,16 @@ struct loadAndComputeDist { } }; -template -struct loadAndComputeDist { - Lambda compute_dist; +template +struct loadAndComputeDist { uint32_t& dist; uint32_t& norm_query; uint32_t& norm_data; - __device__ __forceinline__ - loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) - : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, + uint32_t& norm_query, + uint32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) { } @@ -600,7 +559,7 @@ struct loadAndComputeDist { for (int j = 0; j < kUnroll; ++j) { uint32_t encV = data[loadIndex + j * kIndexGroupSize]; uint32_t queryRegs = query_shared[shmemIndex + j]; - compute_dist(dist, queryRegs, encV); + compute_dist<1, uint8_t, uint32_t>(dist, queryRegs, encV); if constexpr (ComputeNorm) { norm_query += queryRegs * queryRegs; norm_data += encV * encV; @@ -623,7 +582,7 @@ struct loadAndComputeDist { for (int j = 0; j < kUnroll; ++j) { uint32_t encV = data[lane_id + j * kIndexGroupSize]; uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist(dist, q, encV); + compute_dist<1, uint8_t, uint32_t>(dist, q, encV); if constexpr (ComputeNorm) { norm_query += q * q; norm_data += encV * encV; @@ -644,7 +603,7 @@ struct loadAndComputeDist { for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { uint32_t enc = data[lane_id]; uint32_t q = raft::shfl(queryReg, d, raft::WarpSize); - compute_dist(dist, q, enc); + compute_dist<1, uint8_t, uint32_t>(dist, q, enc); if constexpr (ComputeNorm) { norm_query += q * q; norm_data += enc * enc; @@ -654,16 +613,16 @@ struct loadAndComputeDist { }; // This device function is for int8 veclens 4, 8 and 16 -template -struct loadAndComputeDist { - Lambda compute_dist; +template +struct loadAndComputeDist { int32_t& dist; int32_t& norm_query; int32_t& norm_data; - __device__ __forceinline__ - loadAndComputeDist(int32_t& dist, Lambda op, int32_t& norm_query, int32_t& norm_data) - : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + __device__ __forceinline__ loadAndComputeDist(int32_t& dist, + int32_t& norm_query, + int32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) { } @@ -685,7 +644,7 @@ struct loadAndComputeDist(query_shared + shmemIndex) + j * veclen_int); #pragma unroll for (int k = 0; k < veclen_int; k++) { - compute_dist(dist, queryRegs[k], encV[k]); + compute_dist(dist, queryRegs[k], encV[k]); if constexpr (ComputeNorm) { norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); norm_data = raft::dp4a(encV[k], encV[k], norm_data); @@ -717,7 +676,7 @@ struct loadAndComputeDist(dist, q, encV[k]); if constexpr (ComputeNorm) { norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(encV[k], encV[k], norm_data); @@ -739,7 +698,7 @@ struct loadAndComputeDist(dist, q, enc[k]); if constexpr (ComputeNorm) { norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(enc[k], enc[k], norm_data); @@ -749,15 +708,15 @@ struct loadAndComputeDist -struct loadAndComputeDist { - Lambda compute_dist; +template +struct loadAndComputeDist { int32_t& dist; int32_t& norm_query; int32_t& norm_data; - __device__ __forceinline__ - loadAndComputeDist(int32_t& dist, Lambda op, int32_t& norm_query, int32_t& norm_data) - : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + __device__ __forceinline__ loadAndComputeDist(int32_t& dist, + int32_t& norm_query, + int32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) { } __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, @@ -769,7 +728,7 @@ struct loadAndComputeDist { for (int j = 0; j < kUnroll; ++j) { int32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; int32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; - compute_dist(dist, queryRegs, encV); + compute_dist<2, int8_t, int32_t>(dist, queryRegs, encV); if constexpr (ComputeNorm) { norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); norm_data = raft::dp4a(encV, encV, norm_data); @@ -793,7 +752,7 @@ struct loadAndComputeDist { for (int j = 0; j < kUnroll; ++j) { int32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist(dist, q, encV); + compute_dist<2, int8_t, int32_t>(dist, q, encV); if constexpr (ComputeNorm) { norm_query = raft::dp4a(queryReg, queryReg, norm_query); norm_data = raft::dp4a(encV, encV, norm_data); @@ -811,7 +770,7 @@ struct loadAndComputeDist { for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { int32_t enc = reinterpret_cast(data + lane_id * veclen)[0]; int32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); - compute_dist(dist, q, enc); + compute_dist<2, int8_t, int32_t>(dist, q, enc); if constexpr (ComputeNorm) { norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(enc, enc, norm_data); @@ -820,15 +779,15 @@ struct loadAndComputeDist { } }; -template -struct loadAndComputeDist { - Lambda compute_dist; +template +struct loadAndComputeDist { int32_t& dist; int32_t& norm_query; int32_t& norm_data; - __device__ __forceinline__ - loadAndComputeDist(int32_t& dist, Lambda op, int32_t& norm_query, int32_t& norm_data) - : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + __device__ __forceinline__ loadAndComputeDist(int32_t& dist, + int32_t& norm_query, + int32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) { } @@ -839,7 +798,8 @@ struct loadAndComputeDist { { #pragma unroll for (int j = 0; j < kUnroll; ++j) { - compute_dist(dist, query_shared[shmemIndex + j], data[loadIndex + j * kIndexGroupSize]); + compute_dist<1, int8_t, int32_t>( + dist, query_shared[shmemIndex + j], data[loadIndex + j * kIndexGroupSize]); if constexpr (ComputeNorm) { norm_query += int32_t{query_shared[shmemIndex + j]} * int32_t{query_shared[shmemIndex + j]}; norm_data += int32_t{data[loadIndex + j * kIndexGroupSize]} * @@ -862,7 +822,7 @@ struct loadAndComputeDist { #pragma unroll for (int j = 0; j < kUnroll; ++j) { int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist(dist, q, data[lane_id + j * kIndexGroupSize]); + compute_dist<1, int8_t, int32_t>(dist, q, data[lane_id + j * kIndexGroupSize]); if constexpr (ComputeNorm) { norm_query += q * q; norm_data += data[lane_id + j * kIndexGroupSize] * data[lane_id + j * kIndexGroupSize]; @@ -878,7 +838,7 @@ struct loadAndComputeDist { int32_t queryReg = loadDim < dim ? query[loadDim] : 0; for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { int32_t q = raft::shfl(queryReg, d, raft::WarpSize); - compute_dist(dist, q, data[lane_id]); + compute_dist<1, int8_t, int32_t>(dist, q, data[lane_id]); if constexpr (ComputeNorm) { norm_query += q * q; norm_data += int32_t{data[lane_id]} * int32_t{data[lane_id]}; @@ -940,11 +900,9 @@ template RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) - interleaved_scan_kernel(Lambda compute_dist, - PostLambda post_process, + interleaved_scan_kernel(PostLambda post_process, const uint32_t query_smem_elems, const T* query, const uint32_t* coarse_index, @@ -1033,8 +991,8 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) if (valid) { // Process first shm_assisted_dim dimensions (always using shared memory) - loadAndComputeDist lc( - dist, compute_dist, norm_query, norm_dataset); + loadAndComputeDist lc( + dist, norm_query, norm_dataset); for (int pos = 0; pos < shm_assisted_dim; pos += raft::WarpSize, data += kIndexGroupSize * raft::WarpSize) { lc.runLoadShmemCompute(data, query_shared, lane_id, pos); @@ -1042,16 +1000,15 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) if (dim > query_smem_elems) { // The default path - using shfl ops - for dimensions beyond query_smem_elems - loadAndComputeDist lc( - dist, compute_dist, norm_query, norm_dataset); + loadAndComputeDist lc( + dist, norm_query, norm_dataset); for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += raft::WarpSize) { lc.runLoadShflAndCompute(data, query, pos, lane_id); } lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim); } else { // when shm_assisted_dim == full_warps_along_dim < dim - loadAndComputeDist<1, decltype(compute_dist), Veclen, T, AccT, ComputeNorm> lc( - dist, compute_dist, norm_query, norm_dataset); + loadAndComputeDist<1, Veclen, T, AccT, ComputeNorm> lc(dist, norm_query, norm_dataset); for (int pos = full_warps_along_dim; pos < dim; pos += Veclen, data += kIndexGroupSize * Veclen) { lc.runLoadShmemCompute(data, query_shared, lane_id, pos); @@ -1124,10 +1081,9 @@ template -void launch_kernel(Lambda lambda, - PostLambda post_process, +void launch_kernel(PostLambda post_process, const index& index, const T* queries, const uint32_t* coarse_index, @@ -1161,9 +1117,11 @@ void launch_kernel(Lambda lambda, decltype(get_acc_type_tag()), decltype(get_idx_type_tag()), decltype(get_filter_type_tag()), - decltype(get_metric_tag()), decltype(get_post_lambda_tag())>( Capacity, Veclen, Ascending, ComputeNorm); + kernel_planner.template add_metric_device_function()), + decltype(get_acc_type_tag())>( + get_metric_name(), Veclen); auto kernel_launcher = kernel_planner.get_launcher(); const int max_query_smem = 16384; @@ -1220,7 +1178,6 @@ void launch_kernel(Lambda lambda, grid_dim, block_dim, smem_size, - lambda, post_process, query_smem_elems, queries, @@ -1270,8 +1227,8 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg AccT, IdxT, IvfSampleFilterT, - euclidean_dist, - raft::identity_op>({}, {}, std::forward(args)...); + tag_metric_euclidean, + raft::identity_op>({}, std::forward(args)...); case cuvs::distance::DistanceType::L2SqrtExpanded: case cuvs::distance::DistanceType::L2SqrtUnexpanded: return launch_kernel, - raft::sqrt_op>({}, {}, std::forward(args)...); + tag_metric_euclidean, + raft::sqrt_op>({}, std::forward(args)...); case cuvs::distance::DistanceType::InnerProduct: return launch_kernel, - raft::identity_op>({}, {}, std::forward(args)...); + tag_metric_inner_product, + raft::identity_op>({}, std::forward(args)...); case cuvs::distance::DistanceType::CosineExpanded: // NB: "Ascending" is reversed because the post-processing step is done after that sort return launch_kernel>( - {}, + tag_metric_inner_product>( raft::compose_op(raft::add_const_op{1.0f}, raft::mul_const_op{-1.0f}), std::forward(args)...); // NB: update the description of `knn::ivf_flat::build` when // adding here a new metric. diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py index de5839c722..cfb4e8b12d 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py @@ -95,47 +95,47 @@ def get_final_op_abbreviation(op_str): def generate_filename(params): - """Generate filename from template parameters.""" - # params[0]: kBlockSize (numeric) - # params[1]: VecLen (numeric) - # params[2]: kManageLocalTopK (bool) - # params[3]: kPrecompBaseDiff (bool) + """Generate filename from template parameters (WITHOUT metric).""" + # params[0]: Capacity (numeric) + # params[1]: Veclen (numeric) + # params[2]: Ascending (bool) + # params[3]: ComputeNorm (bool) # params[4]: T (type) # params[5]: AccT (type) # params[6]: IdxT (type) # params[7]: FilterT (filter type) - # params[8]: DistanceT (distance metric) - # params[9]: FinalLambda (final operator) + # params[8]: Lambda/MetricTag (metric type - EXCLUDED from filename) + # params[9]: PostLambda (final operator) parts = [ - params[0], # kBlockSize - params[1], # VecLen - params[2], # kManageLocalTopK - params[3], # kPrecompBaseDiff + params[0], # Capacity + params[1], # Veclen + params[2], # Ascending + params[3], # ComputeNorm get_type_abbreviation(params[4]), # T get_type_abbreviation(params[5]), # AccT get_type_abbreviation(params[6]), # IdxT get_filter_abbreviation(params[7]), # FilterT - get_distance_abbreviation(params[8]), # DistanceT - get_final_op_abbreviation(params[9]) # FinalLambda + # params[8] EXCLUDED - metric + get_final_op_abbreviation(params[9]) # PostLambda ] return f"interleaved_scan_kernel_{'_'.join(parts)}.cu" def generate_register_function_name(params): - """Generate the registration function name from template parameters.""" + """Generate the registration function name from template parameters (WITHOUT metric).""" parts = [ - params[0], # kBlockSize - params[1], # VecLen - params[2], # kManageLocalTopK - params[3], # kPrecompBaseDiff + params[0], # Capacity + params[1], # Veclen + params[2], # Ascending + params[3], # ComputeNorm get_type_abbreviation(params[4]), # T get_type_abbreviation(params[5]), # AccT get_type_abbreviation(params[6]), # IdxT get_filter_abbreviation(params[7]), # FilterT - get_distance_abbreviation(params[8]), # DistanceT - get_final_op_abbreviation(params[9]) # FinalLambda + # params[8] EXCLUDED - metric + get_final_op_abbreviation(params[9]) # PostLambda ] return f"interleaved_scan_kernel_{'_'.join(parts)}" @@ -228,11 +228,21 @@ def generate_cuda_file_content(params): filename = generate_register_function_name(params) embedded_var_name = f"embedded_{filename}" - # Format template parameters for the template instantiation (all 10 params) - template_params = ', '.join(params) - - # Convert params 4-9 to tag types for registerAlgorithm - tag_params = [param_to_tag(i, params[i], params) for i in range(4, 10)] + # The kernel now has 9 template parameters (removed MetricTag) + # params[0-3]: Capacity, Veclen, Ascending, ComputeNorm + # params[4]: T (data type) + # params[5]: AccT (accumulator type) + # params[6]: IdxT (index type) + # params[7]: IvfSampleFilterT (filter type) + # params[8]: Lambda (metric - NOT used in template anymore) + # params[9]: PostLambda (post-processing operator) + + # Template parameters without MetricTag (params 0-7, 9) + template_params_list = params[0:8] + [params[9]] + template_params = ', '.join(template_params_list) + + # Convert params 4-7 and 9 to tag types for registerAlgorithm (NO metric tag) + tag_params = [param_to_tag(i, params[i], params) for i in [4, 5, 6, 7, 9]] register_template_params = ', '.join(tag_params) # Create the string parameter with first four params (Capacity, Veclen, Ascending, ComputeNorm) @@ -258,7 +268,11 @@ def generate_cuda_file_content(params): #include "../../ivf_flat_interleaved_scan.cuh" -template __global__ void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<{template_params}>({params[8]}, {params[9]}, unsigned int, {params[4]} const*, unsigned int const*, {params[4]} const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, {params[7]}, unsigned int*, float*); +namespace cuvs::neighbors::ivf_flat::detail {{ + +template __global__ void interleaved_scan_kernel<{template_params}>({params[9]}, unsigned int, {params[4]} const*, unsigned int const*, {params[4]} const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, {params[7]}, unsigned int*, float*); + +}} // namespace cuvs::neighbors::ivf_flat::detail #else @@ -281,6 +295,150 @@ def generate_cuda_file_content(params): return content +def generate_metric_device_function_content(metric_name, veclen, data_type, acc_type): + """Generate content for a metric device function file.""" + # Map types to their tag equivalents + # Mapping for data types (T) + data_type_to_tag = { + 'float': 'tag_float', + '__half': 'tag_half', + 'int8_t': 'tag_int8', + 'uint8_t': 'tag_uint8', + } + + # Mapping for accumulator types (AccT) + acc_type_to_tag = { + 'float': 'tag_acc_float', + '__half': 'tag_acc_half', + 'int32_t': 'tag_acc_int32', + 'uint32_t': 'tag_acc_uint32', + } + + # Get abbreviated names for filename + type_abbrev = { + 'float': 'f', + '__half': 'h', + 'int8_t': 'i8', + 'uint8_t': 'u8', + 'int32_t': 'i32', + 'uint32_t': 'u32', + } + + data_tag = data_type_to_tag.get(data_type, data_type) + acc_tag = acc_type_to_tag.get(acc_type, acc_type) + + # Determine which header to include and implementation struct based on metric + if metric_name == 'euclidean': + header_file = '../metric_euclidean_dist.cuh' + metric_impl = 'euclidean_dist' + elif metric_name == 'inner_prod': + header_file = '../metric_inner_product.cuh' + metric_impl = 'inner_prod_dist' + else: + raise ValueError(f"Unknown metric: {metric_name}") + + content = f"""/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef BUILD_KERNEL + +#include "{header_file}" + +namespace cuvs::neighbors::ivf_flat::detail {{ + +template __device__ void compute_dist<{veclen}, {data_type}, {acc_type}>({acc_type}&, {acc_type}, {acc_type}); + +}} // namespace cuvs::neighbors::ivf_flat::detail + +#else + +#include "{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}.h" +#include +#include "../interleaved_scan_tags.hpp" + +__attribute__((__constructor__)) static void register_{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}() +{{ +using namespace cuvs::neighbors::ivf_flat::detail; +registerAlgorithm<{data_tag}, {acc_tag}>("{metric_name}_{veclen}", + embedded_{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}, + sizeof(embedded_{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]})); +}} + +#endif +""" + return content + + +def generate_metric_device_functions(script_dir): + """Generate all metric device function files.""" + # Define all combinations we need + # Based on the kernel signatures, we have: + # - Veclen: 1, 2, 4, 8, 16 + # - Data types: float, __half, int8_t, uint8_t + # - Acc types: float (for float), __half (for __half), int32_t (for int8_t), uint32_t (for uint8_t) + # - Metrics: euclidean, inner_prod + + type_combinations = [ + ('float', 'float'), + ('__half', '__half'), + ('int8_t', 'int32_t'), + ('uint8_t', 'uint32_t'), + ] + + veclens = [1, 2, 4, 8, 16] + metrics = ['euclidean', 'inner_prod'] + + output_dir = script_dir / 'metric_device_functions' + output_dir.mkdir(parents=True, exist_ok=True) + + generated_files = [] + + type_abbrev = { + 'float': 'f', + '__half': 'h', + 'int8_t': 'i8', + 'uint8_t': 'u8', + 'int32_t': 'i32', + 'uint32_t': 'u32', + } + + for metric in metrics: + for veclen in veclens: + for data_type, acc_type in type_combinations: + filename = f"{metric}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}.cu" + file_content = generate_metric_device_function_content(metric, veclen, data_type, acc_type) + + # Write file only if it doesn't exist or content has changed + output_file = output_dir / filename + should_write = True + if output_file.exists(): + with open(output_file, 'r') as f: + existing_content = f.read() + should_write = (existing_content != file_content) + + if should_write: + with open(output_file, 'w') as f: + f.write(file_content) + + generated_files.append(filename) + + print(f"Generated {len(generated_files)} metric device function files") + return generated_files + + def main(): # Get the script directory to find the kernels file script_dir = Path(__file__).parent.absolute() @@ -299,7 +457,8 @@ def main(): output_dir.mkdir(parents=True, exist_ok=True) # Parse all kernels and generate files - generated_files = [] + # Use a dict to deduplicate by filename (since we exclude metric from filename) + unique_kernels = {} for line_num, line in enumerate(lines, 1): line = line.strip() @@ -331,9 +490,15 @@ def main(): # Generate filename and content filename = generate_filename(params) - file_content = generate_cuda_file_content(params) - # Write file only if it doesn't exist or content has changed + # Only generate if we haven't seen this filename yet (deduplication) + if filename not in unique_kernels: + file_content = generate_cuda_file_content(params) + unique_kernels[filename] = file_content + + # Write all unique kernel files + generated_files = [] + for filename, file_content in unique_kernels.items(): output_file = output_dir / filename should_write = True if output_file.exists(): @@ -347,11 +512,14 @@ def main(): generated_files.append(filename) - if line_num % 100 == 0: - print(f"Generated {line_num} files...") + if len(generated_files) % 100 == 0: + print(f"Generated {len(generated_files)} files...") print(f"\nGenerated {len(generated_files)} CUDA kernel files") + # Generate metric device function files + metric_files = generate_metric_device_functions(script_dir) + # Generate CMake file with all filenames # We're generating in the source tree at: cpp/src/neighbors/ivf_flat/jit_lto_kernels/ # CMake file goes to: cpp/cmake/jit_lto_kernels_list/ @@ -365,6 +533,12 @@ def main(): cmake_content += "set(INTERLEAVED_SCAN_KERNEL_FILES\n" for filename in sorted(generated_files): cmake_content += f" src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels/{filename}\n" + cmake_content += ")\n\n" + + # Add metric device function files + cmake_content += "set(METRIC_DEVICE_FUNCTION_FILES\n" + for filename in sorted(metric_files): + cmake_content += f" src/neighbors/ivf_flat/jit_lto_kernels/metric_device_functions/{filename}\n" cmake_content += ")\n" # Only write if content has changed diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp index 6f65a90612..8d2d4b9f56 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -33,4 +34,19 @@ struct InterleavedScanPlanner : AlgorithmPlanner { { std::cout << "In the planner" << std::endl; } + + template + void add_metric_device_function(std::string metric_name, int Veclen) + { + auto& db = fragment_database(); + auto key = metric_name + "_" + std::to_string(Veclen); + auto params = make_fragment_key(); + std::cout << "Looking for metric fragment: " << key + "_" + params << std::endl; + auto metric_fragment = db.cache.find(key + "_" + params); + if (metric_fragment == db.cache.end()) { + std::cout << "Metric fragment not found" << std::endl; + return; + } + fragments.push_back(metric_fragment->second.get()); + } }; diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh new file mode 100644 index 0000000000..1264f27e5a --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "interleaved_scan_tags.hpp" +#include + +namespace cuvs::neighbors::ivf_flat::detail { + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) + { + const auto diff = x - y; + acc += diff * diff; + } +}; + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(uint32_t& acc, uint32_t x, uint32_t y) + { + if constexpr (Veclen > 1) { + const auto diff = __vabsdiffu4(x, y); + acc = raft::dp4a(diff, diff, acc); + } else { + const auto diff = __usad(x, y, 0u); + acc += diff * diff; + } + } +}; + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(int32_t& acc, int32_t x, int32_t y) + { + if constexpr (Veclen > 1) { + // Note that we enforce here that the unsigned version of dp4a is used, because the difference + // between two int8 numbers can be greater than 127 and therefore represented as a negative + // number in int8. Casting from int8 to int32 would yield incorrect results, while casting + // from uint8 to uint32 is correct. + const auto diff = __vabsdiffs4(x, y); + acc = raft::dp4a(diff, diff, static_cast(acc)); + } else { + const auto diff = x - y; + acc += diff * diff; + } + } +}; + +template +__device__ void compute_dist(AccT& acc, AccT x, AccT y) +{ + euclidean_dist{}(acc, x, y); +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh new file mode 100644 index 0000000000..a69ab2f71f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "interleaved_scan_tags.hpp" +#include + +namespace cuvs::neighbors::ivf_flat::detail { + +template +struct inner_prod_dist { + __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) + { + if constexpr (Veclen > 1 && (std::is_same_v || std::is_same_v)) { + acc = raft::dp4a(x, y, acc); + } else { + acc += x * y; + } + } +}; + +template +__device__ void compute_dist(AccT& acc, AccT x, AccT y) +{ + inner_prod_dist{}(acc, x, y); +} + +} // namespace cuvs::neighbors::ivf_flat::detail From 6eee4daa00418f6f01fb4f904b85a55cee0352e2 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 6 Oct 2025 05:55:06 +0000 Subject: [PATCH 007/158] fix deps --- .../bench_ann_cuda-129_arch-aarch64.yaml | 1 - .../bench_ann_cuda-129_arch-x86_64.yaml | 1 - .../bench_ann_cuda-130_arch-aarch64.yaml | 1 - .../bench_ann_cuda-130_arch-x86_64.yaml | 1 - conda/environments/go_cuda-129_arch-aarch64.yaml | 1 - conda/environments/go_cuda-129_arch-x86_64.yaml | 1 - conda/environments/go_cuda-130_arch-aarch64.yaml | 1 - conda/environments/go_cuda-130_arch-x86_64.yaml | 1 - .../environments/rust_cuda-129_arch-aarch64.yaml | 1 - .../environments/rust_cuda-129_arch-x86_64.yaml | 1 - .../environments/rust_cuda-130_arch-aarch64.yaml | 1 - .../environments/rust_cuda-130_arch-x86_64.yaml | 1 - conda/recipes/cuvs-bench/recipe.yaml | 3 --- conda/recipes/libcuvs/recipe.yaml | 7 ------- dependencies.yaml | 16 ++++------------ python/cuvs/pyproject.toml | 2 -- 16 files changed, 4 insertions(+), 36 deletions(-) diff --git a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml index 2d11678e7b..5fe174ab6a 100644 --- a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml @@ -31,7 +31,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - nccl>=2.19 diff --git a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml index 82ca29dbdd..098e1765fa 100644 --- a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml @@ -33,7 +33,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - mkl-devel=2023 diff --git a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml index 45dc071a2f..3a54fecedb 100644 --- a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml @@ -31,7 +31,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - nccl>=2.19 diff --git a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml index 70c32bf03f..95aa4d7a13 100644 --- a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml @@ -33,7 +33,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - mkl-devel=2023 diff --git a/conda/environments/go_cuda-129_arch-aarch64.yaml b/conda/environments/go_cuda-129_arch-aarch64.yaml index 45e8f94697..b8bf557877 100644 --- a/conda/environments/go_cuda-129_arch-aarch64.yaml +++ b/conda/environments/go_cuda-129_arch-aarch64.yaml @@ -25,7 +25,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/go_cuda-129_arch-x86_64.yaml b/conda/environments/go_cuda-129_arch-x86_64.yaml index ce137edfce..adc12d644b 100644 --- a/conda/environments/go_cuda-129_arch-x86_64.yaml +++ b/conda/environments/go_cuda-129_arch-x86_64.yaml @@ -25,7 +25,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/go_cuda-130_arch-aarch64.yaml b/conda/environments/go_cuda-130_arch-aarch64.yaml index c6fd84a0d3..ca450a317c 100644 --- a/conda/environments/go_cuda-130_arch-aarch64.yaml +++ b/conda/environments/go_cuda-130_arch-aarch64.yaml @@ -25,7 +25,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/go_cuda-130_arch-x86_64.yaml b/conda/environments/go_cuda-130_arch-x86_64.yaml index 90bf0dc636..5873836633 100644 --- a/conda/environments/go_cuda-130_arch-x86_64.yaml +++ b/conda/environments/go_cuda-130_arch-x86_64.yaml @@ -25,7 +25,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-129_arch-aarch64.yaml b/conda/environments/rust_cuda-129_arch-aarch64.yaml index 216ea42da4..28d7701d68 100644 --- a/conda/environments/rust_cuda-129_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-129_arch-aarch64.yaml @@ -22,7 +22,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/environments/rust_cuda-129_arch-x86_64.yaml b/conda/environments/rust_cuda-129_arch-x86_64.yaml index e9b8726a47..a21932185b 100644 --- a/conda/environments/rust_cuda-129_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-129_arch-x86_64.yaml @@ -22,7 +22,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/environments/rust_cuda-130_arch-aarch64.yaml b/conda/environments/rust_cuda-130_arch-aarch64.yaml index 1c3317bbb6..7533f45e23 100644 --- a/conda/environments/rust_cuda-130_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-130_arch-aarch64.yaml @@ -22,7 +22,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/environments/rust_cuda-130_arch-x86_64.yaml b/conda/environments/rust_cuda-130_arch-x86_64.yaml index bb2d413b94..0b4dbd7b09 100644 --- a/conda/environments/rust_cuda-130_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-130_arch-x86_64.yaml @@ -22,7 +22,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/recipes/cuvs-bench/recipe.yaml b/conda/recipes/cuvs-bench/recipe.yaml index 2e0e750701..fe12bea9ae 100644 --- a/conda/recipes/cuvs-bench/recipe.yaml +++ b/conda/recipes/cuvs-bench/recipe.yaml @@ -30,7 +30,6 @@ requirements: - cuda-version =${{ cuda_version }} - libcuvs =${{ version }} - libcuvs-bench-ann =${{ version }} - - libnvjitlink-dev - python =${{ py_version }} - pip - rapids-build-backend >=0.4.0,<0.5.0.dev0 @@ -47,7 +46,6 @@ requirements: - h5py ${{ h5py_version }} - libcublas - libcuvs-bench-ann =${{ version }} - - libnvjitlink-dev - matplotlib-base>=3.9 - pandas - pylibraft =${{ minor_version }} @@ -67,7 +65,6 @@ requirements: - libaio - libboost - libcublas - - libnvjitlink-dev - mkl tests: diff --git a/conda/recipes/libcuvs/recipe.yaml b/conda/recipes/libcuvs/recipe.yaml index 272e44db27..340533f092 100644 --- a/conda/recipes/libcuvs/recipe.yaml +++ b/conda/recipes/libcuvs/recipe.yaml @@ -153,7 +153,6 @@ outputs: - cmake ${{ cmake_version }} - ${{ stdlib("c") }} host: - - libnvjitlink-dev - librmm =${{ minor_version }} - libraft-headers =${{ minor_version }} - nccl ${{ nccl_version }} @@ -169,7 +168,6 @@ outputs: run: - ${{ pin_subpackage("libcuvs", exact=True) }} - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - - libnvjitlink-dev - libraft-headers =${{ minor_version }} - nccl - cuda-cudart @@ -188,7 +186,6 @@ outputs: - libcusolver - libcusparse - libcuda.so.1 - - libnvjitlink-dev - librmm - mkl - nccl @@ -247,7 +244,6 @@ outputs: host: - ${{ pin_subpackage("libcuvs", exact=True) }} - cuda-version =${{ cuda_version }} - - libnvjitlink-dev - libraft-headers =${{ minor_version }} - librmm =${{ minor_version }} - nccl ${{ nccl_version }} @@ -317,7 +313,6 @@ outputs: - libcusolver-dev - libcusparse-dev - libgomp - - libnvjitlink-dev - libraft-headers =${{ minor_version }} - librmm =${{ minor_version }} - nccl ${{ nccl_version }} @@ -335,7 +330,6 @@ outputs: - libcurand - libcusolver - libcusparse - - libnvjitlink-dev - libraft-headers =${{ minor_version }} - nccl - if: linux64 @@ -348,7 +342,6 @@ outputs: - libcurand - libcusolver - libcusparse - - libnvjitlink-dev - librmm - mkl - nccl diff --git a/dependencies.yaml b/dependencies.yaml index b44828fe54..623dd6b434 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -16,7 +16,7 @@ files: - cuda_version - depends_on_cuda_python - depends_on_cupy - - depends_on_libnvjitlink-dev + - depends_on_libnvjitlink_dev - depends_on_librmm - depends_on_pylibraft - depends_on_nccl @@ -43,7 +43,6 @@ files: - cuda_version - depends_on_cuda_python - depends_on_cupy - - depends_on_libnvjitlink-dev - depends_on_pylibraft - depends_on_libcuvs - depends_on_librmm @@ -57,7 +56,6 @@ files: - test_libcuvs - depends_on_libcuvs - depends_on_libcuvs_tests - - depends_on_libnvjitlink-dev test_python: output: none includes: @@ -69,7 +67,6 @@ files: - depends_on_libcuvs - depends_on_cuvs - depends_on_cuvs_bench - - depends_on_libnvjitlink-dev checks: output: none includes: @@ -101,7 +98,6 @@ files: - rapids_build - rust - depends_on_libcuvs - - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_nccl go: @@ -116,7 +112,6 @@ files: - rapids_build - go - depends_on_libcuvs - - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_nccl java: @@ -125,7 +120,6 @@ files: - cuda - cuda_version - depends_on_libcuvs - - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_nccl - java @@ -144,7 +138,7 @@ files: table: tool.rapids-build-backend key: requires includes: - - depends_on_libnvjitlink-dev + - depends_on_libnvjitlink_dev - depends_on_libraft - depends_on_librmm - depends_on_nccl @@ -156,7 +150,7 @@ files: table: project includes: - cuda_wheels - - depends_on_libnvjitlink-dev + - depends_on_libnvjitlink_dev - depends_on_libraft - depends_on_librmm - depends_on_nccl @@ -178,7 +172,6 @@ files: - build_py_cuvs - depends_on_cuda_python - depends_on_libcuvs - - depends_on_libnvjitlink-dev - depends_on_libraft - depends_on_librmm - rapids_build @@ -189,7 +182,6 @@ files: table: project includes: - depends_on_cuda_python - - depends_on_libnvjitlink-dev - depends_on_libcuvs - depends_on_pylibraft - run_py_cuvs @@ -548,7 +540,7 @@ dependencies: - output_types: conda packages: - libcuvs-tests==25.12.*,>=0.0.0a0 - depends_on_libnvjitlink-dev: + depends_on_libnvjitlink_dev: common: - output_types: conda packages: diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index 2c12a12a07..98c097fc2e 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -33,7 +33,6 @@ requires-python = ">=3.10" dependencies = [ "cuda-python>=13.0.1,<14.0a0", "libcuvs==25.12.*,>=0.0.0a0", - "libnvjitlink-dev", "numpy>=1.23,<3.0a0", "pylibraft==25.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. @@ -121,7 +120,6 @@ requires = [ "cuda-python>=13.0.1,<14.0a0", "cython>=3.0.0", "libcuvs==25.12.*,>=0.0.0a0", - "libnvjitlink-dev", "libraft==25.12.*,>=0.0.0a0", "librmm==25.12.*,>=0.0.0a0", "ninja", From 1de8f28eb8d4aceabb504770d866d6b02f96e925 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Oct 2025 00:00:31 +0000 Subject: [PATCH 008/158] add filters as jit device functions, rework caching logic --- .gitignore | 1 + cpp/CMakeLists.txt | 1 + .../generate_interleaved_scan_kernels.cmake | 7 + .../cuvs/detail/jit_lto/AlgorithmPlanner.h | 10 +- .../cuvs/detail/jit_lto/FragmentDatabase.h | 4 +- cpp/include/cuvs/neighbors/common.hpp | 3 +- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 52 +- cpp/src/detail/jit_lto/FragmentDatabase.cu | 18 + .../ivf_flat/ivf_flat_interleaved_scan.cuh | 1048 ++--------------- .../ivf_flat_interleaved_scan_kernel.cuh | 980 +++++++++++++++ .../jit_lto_kernels/filter_bitset.cuh | 41 + .../ivf_flat/jit_lto_kernels/filter_none.cuh | 35 + .../jit_lto_kernels/generate_kernels.py | 122 +- .../interleaved_scan_planner.hpp | 15 +- cpp/src/neighbors/sample_filter.cuh | 4 +- 15 files changed, 1318 insertions(+), 1023 deletions(-) create mode 100644 cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh diff --git a/.gitignore b/.gitignore index cc96d157f3..bea7afa8e0 100644 --- a/.gitignore +++ b/.gitignore @@ -93,3 +93,4 @@ ivf_pq_index cpp/cmake/jit_lto_kernels_list/ cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels/interleaved_scan_kernel_*.cu cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_device_functions/*.cu +cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_device_functions/*.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3dc60eee5f..dbeb2f3a50 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -574,6 +574,7 @@ if(NOT BUILD_CPU_ONLY) jit_lto_fatbins OBJECT ${INTERLEAVED_SCAN_KERNEL_FILES} ${METRIC_DEVICE_FUNCTION_FILES} + ${FILTER_DEVICE_FUNCTION_FILES} ) # Make sure the kernels are generated before we try to build them diff --git a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake index 6089faf676..7fd82f8ac7 100644 --- a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake +++ b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake @@ -68,9 +68,16 @@ function(generate_interleaved_scan_kernels) list(APPEND FULL_PATH_METRIC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${metric_file}) endforeach() + # Prepend the source directory path to all filter device function files + set(FULL_PATH_FILTER_FILES) + foreach(filter_file ${FILTER_DEVICE_FUNCTION_FILES}) + list(APPEND FULL_PATH_FILTER_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${filter_file}) + endforeach() + # Return the lists to parent scope set(INTERLEAVED_SCAN_KERNEL_FILES ${FULL_PATH_KERNEL_FILES} PARENT_SCOPE) set(METRIC_DEVICE_FUNCTION_FILES ${FULL_PATH_METRIC_FILES} PARENT_SCOPE) + set(FILTER_DEVICE_FUNCTION_FILES ${FULL_PATH_FILTER_FILES} PARENT_SCOPE) set(INTERLEAVED_SCAN_KERNELS_STAMP ${STAMP_FILE} PARENT_SCOPE) set(INTERLEAVED_SCAN_KERNELS_TARGET generate_interleaved_scan_kernels_target PARENT_SCOPE) endfunction() diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h index 83e8e86adf..00c5516b45 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h @@ -24,15 +24,17 @@ struct FragmentEntry; struct AlgorithmPlanner { - AlgorithmPlanner(std::string const& n, std::string const& p) : name(n), params(p) {} + AlgorithmPlanner(std::string const& n, std::string const& p) : entrypoint(n + "_" + p) {} AlgorithmLauncher get_launcher(); - std::string name; - std::string params; + std::string entrypoint; + std::vector device_functions; std::vector fragments; private: - void save_compute(); + void add_entrypoint(); + void add_device_functions(); + std::string get_device_functions_key(); AlgorithmLauncher build(); }; diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h index 297fbf662d..5a46c92a4e 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h @@ -39,7 +39,7 @@ class FragmentDatabase { FragmentDatabase& operator=(FragmentDatabase&&) = delete; FragmentDatabase& operator=(FragmentDatabase const&) = delete; - std::unordered_map> cache; + FragmentEntry* get_fragment(std::string const& key); private: FragmentDatabase(); @@ -52,6 +52,8 @@ class FragmentDatabase { std::string const& params, unsigned char const* blob, std::size_t size); + + std::unordered_map> cache; }; FragmentDatabase& fragment_database(); diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 4ed79dd695..5e05fe7ad2 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -522,7 +522,8 @@ struct ivf_to_sample_filter : public base_filter { const index_t* const* inds_ptrs_; const filter_t next_filter_; - ivf_to_sample_filter(const index_t* const* inds_ptrs, const filter_t next_filter); + _RAFT_HOST_DEVICE ivf_to_sample_filter(const index_t* const* inds_ptrs, + const filter_t next_filter); /** \cond */ /** If the original filter takes three arguments, then don't modify the arguments. diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 6ab3a528c8..44b6914112 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -47,39 +47,45 @@ void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) } } // namespace -void AlgorithmPlanner::save_compute() +void AlgorithmPlanner::add_entrypoint() { - std::cout << "Saving compute" << std::endl; - auto& db = fragment_database(); - std::cout << "DB size: " << db.cache.size() << std::endl; - std::cout << "Available keys in cache:" << std::endl; - for (const auto& pair : db.cache) { - std::cout << " " << pair.first << std::endl; - } - std::cout << "Finding key: " << this->name + "_" + this->params << std::endl; - auto val = db.cache.find(this->name + "_" + this->params); - if (val == db.cache.end()) { - std::cout << "Key not found" << std::endl; - return; - } - this->fragments.push_back(val->second.get()); + auto entrypoint_fragment = fragment_database().get_fragment(this->entrypoint); + this->fragments.push_back(entrypoint_fragment); std::cout << "Fragment added with key: " << fragments.back()->compute_key << std::endl; std::cout << "Fragments size: " << this->fragments.size() << std::endl; } +void AlgorithmPlanner::add_device_functions() +{ + for (const auto& device_function_key : this->device_functions) { + auto device_function_fragment = fragment_database().get_fragment(device_function_key); + this->fragments.push_back(device_function_fragment); + std::cout << "Fragment added with key: " << fragments.back()->compute_key << std::endl; + std::cout << "Fragments size: " << this->fragments.size() << std::endl; + } +} + +std::string AlgorithmPlanner::get_device_functions_key() +{ + std::string key = ""; + for (const auto& device_function : this->device_functions) { + key += "_" + device_function; + } + return key; +} + AlgorithmLauncher AlgorithmPlanner::get_launcher() { std::cout << "Getting launcher" << std::endl; auto& launchers = get_cached_launchers(); - auto key = this->name + "_" + this->params; - if (launchers.count(key) == 0) { - this->save_compute(); - launchers[key] = this->build(); + auto launch_key = this->entrypoint + this->get_device_functions_key(); + if (launchers.count(launch_key) == 0) { + add_entrypoint(); + add_device_functions(); + launchers[launch_key] = this->build(); } - std::cout << "launcher key: " << key << std::endl; - return launchers[key]; - // this->save_compute(); - // return this->build(); + std::cout << "launcher key: " << launch_key << std::endl; + return launchers[launch_key]; } AlgorithmLauncher AlgorithmPlanner::build() diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index 3c8fee591c..0bdc7cdc88 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -37,6 +37,24 @@ FragmentDatabase& fragment_database() return database; } +FragmentEntry* FragmentDatabase::get_fragment(std::string const& key) +{ + std::cout << "Saving compute" << std::endl; + auto& db = fragment_database(); + std::cout << "DB size: " << db.cache.size() << std::endl; + std::cout << "Available keys in cache:" << std::endl; + for (const auto& pair : db.cache) { + std::cout << " " << pair.first << std::endl; + } + std::cout << "Finding key: " << key << std::endl; + auto val = db.cache.find(key); + if (val == db.cache.end()) { + std::cout << "Key not found" << std::endl; + return nullptr; + } + return val->second.get(); +} + void registerFatbinFragment(std::string const& algo, std::string const& params, unsigned char const* blob, diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index 03a11934f4..9ddadb3ecf 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -17,9 +17,9 @@ #pragma once #include "../ivf_common.cuh" -#include "../sample_filter.cuh" #include "jit_lto_kernels/interleaved_scan_planner.hpp" #include "jit_lto_kernels/interleaved_scan_tags.hpp" +#include #include #include @@ -27,23 +27,16 @@ #include #include #include -#include #include // RAFT_CUDA_TRY -#include -#include #include -#include #include namespace cuvs::neighbors::ivf_flat::detail { -using namespace cuvs::spatial::knn::detail; // NOLINT - -constexpr int kThreadsPerBlock = 128; +static constexpr int kThreadsPerBlock = 128; -template -extern __device__ void compute_dist(AccT& acc, AccT x, AccT y); +using namespace cuvs::spatial::knn::detail; // NOLINT // Constexpr mapping functions from actual types to tags template @@ -76,11 +69,10 @@ constexpr auto get_filter_type_tag() using namespace cuvs::neighbors::filtering; // Determine the filter implementation tag - if constexpr (std::is_same_v>) { + if constexpr (std::is_same_v) { return tag_filter{}; } - if constexpr (std::is_same_v>>) { + if constexpr (std::is_same_v>) { return tag_filter{}; } } @@ -113,6 +105,19 @@ constexpr auto get_metric_name() } } +template +constexpr auto get_filter_name() +{ + if constexpr (std::is_same_v>) { + return "filter_none"; + } + if constexpr (std::is_same_v>) { + return "filter_bitset"; + } +} + template constexpr auto get_post_lambda_tag() { @@ -126,931 +131,6 @@ constexpr auto get_post_lambda_tag() } } -/** - * @brief Copy `n` elements per block from one place to another. - * - * @param[out] out target pointer (unique per block) - * @param[in] in source pointer - * @param n number of elements to copy - */ -template -__device__ inline void copy_vectorized(T* out, const T* in, uint32_t n) -{ - constexpr int VecElems = VecBytes / sizeof(T); // NOLINT - using align_bytes = raft::Pow2<(size_t)VecBytes>; - if constexpr (VecElems > 1) { - using align_elems = raft::Pow2; - if (!align_bytes::areSameAlignOffsets(out, in)) { - return copy_vectorized<(VecBytes >> 1), T>(out, in, n); - } - { // process unaligned head - uint32_t head = align_bytes::roundUp(in) - in; - if (head > 0) { - copy_vectorized(out, in, head); - n -= head; - in += head; - out += head; - } - } - { // process main part vectorized - using vec_t = typename raft::IOType::Type; - copy_vectorized( - reinterpret_cast(out), reinterpret_cast(in), align_elems::div(n)); - } - { // process unaligned tail - uint32_t tail = align_elems::mod(n); - if (tail > 0) { - n -= tail; - copy_vectorized(out + n, in + n, tail); - } - } - } - if constexpr (VecElems <= 1) { - for (int i = threadIdx.x; i < n; i += blockDim.x) { - out[i] = in[i]; - } - } -} - -/** - * @brief Load a part of a vector from the index and from query, compute the (part of the) distance - * between them, and aggregate it using the provided Lambda; one structure per thread, per query, - * and per index item. - * - * @tparam kUnroll elements per loop (normally, kUnroll = WarpSize / Veclen) - * @tparam Lambda computing the part of the distance for one dimension and aggregating it: - * void (AccT& acc, AccT x, AccT y) - * @tparam Veclen size of the vectorized load - * @tparam T type of the data in the query and the index - * @tparam AccT type of the accumulated value (an optimization for 8bit values to be loaded as 32bit - * values) - */ -template -struct loadAndComputeDist { - AccT& dist; - AccT& norm_query; - AccT& norm_data; - - __device__ __forceinline__ loadAndComputeDist(AccT& dist, AccT& norm_query, AccT& norm_data) - : dist(dist), norm_query(norm_query), norm_data(norm_data) - { - } - - /** - * Load parts of vectors from the index and query and accumulates the partial distance. - * This version assumes the query is stored in shared memory. - * Every thread here processes exactly kUnroll * Veclen elements independently of others. - */ - template - __device__ __forceinline__ void runLoadShmemCompute(const T* const& data, - const T* query_shared, - IdxT loadIndex, - IdxT shmemIndex) - { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - T encV[Veclen]; - raft::ldg(encV, data + (loadIndex + j * kIndexGroupSize) * Veclen); - T queryRegs[Veclen]; - raft::lds(queryRegs, &query_shared[shmemIndex + j * Veclen]); -#pragma unroll - for (int k = 0; k < Veclen; ++k) { - compute_dist(dist, queryRegs[k], encV[k]); - if constexpr (ComputeNorm) { - norm_query += queryRegs[k] * queryRegs[k]; - norm_data += encV[k] * encV[k]; - } - } - } - } - - /** - * Load parts of vectors from the index and query and accumulates the partial distance. - * This version assumes the query is stored in the global memory and is different for every - * thread. One warp loads exactly WarpSize query elements at once and then reshuffles them into - * corresponding threads (`WarpSize / (kUnroll * Veclen)` elements per thread at once). - */ - template - __device__ __forceinline__ void runLoadShflAndCompute(const T*& data, - const T* query, - IdxT baseLoadIndex, - const int lane_id) - { - T queryReg = query[baseLoadIndex + lane_id]; - constexpr int stride = kUnroll * Veclen; - constexpr int totalIter = raft::WarpSize / stride; - constexpr int gmemStride = stride * kIndexGroupSize; -#pragma unroll - for (int i = 0; i < totalIter; ++i, data += gmemStride) { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - T encV[Veclen]; - raft::ldg(encV, data + (lane_id + j * kIndexGroupSize) * Veclen); - const int d = (i * kUnroll + j) * Veclen; -#pragma unroll - for (int k = 0; k < Veclen; ++k) { - T q = raft::shfl(queryReg, d + k, raft::WarpSize); - compute_dist(dist, q, encV[k]); - if constexpr (ComputeNorm) { - norm_query += q * q; - norm_data += encV[k] * encV[k]; - } - } - } - } - } - - /** - * Load parts of vectors from the index and query and accumulates the partial distance. - * This version augments `runLoadShflAndCompute` when `dim` is not a multiple of `WarpSize`. - */ - __device__ __forceinline__ void runLoadShflAndComputeRemainder( - const T*& data, const T* query, const int lane_id, const int dim, const int dimBlocks) - { - const int loadDim = dimBlocks + lane_id; - T queryReg = loadDim < dim ? query[loadDim] : T{0}; - const int loadDataIdx = lane_id * Veclen; - for (int d = 0; d < dim - dimBlocks; d += Veclen, data += kIndexGroupSize * Veclen) { - T enc[Veclen]; - raft::ldg(enc, data + loadDataIdx); -#pragma unroll - for (int k = 0; k < Veclen; k++) { - T q = raft::shfl(queryReg, d + k, raft::WarpSize); - compute_dist(dist, q, enc[k]); - if constexpr (ComputeNorm) { - norm_query += q * q; - norm_data += enc[k] * enc[k]; - } - } - } - } -}; - -// This handles uint8_t 8, 16 Veclens -template -struct loadAndComputeDist { - uint32_t& dist; - uint32_t& norm_query; - uint32_t& norm_data; - - __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, - uint32_t& norm_query, - uint32_t& norm_data) - : dist(dist), norm_query(norm_query), norm_data(norm_data) - { - } - - __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, - const uint8_t* query_shared, - int loadIndex, - int shmemIndex) - { - constexpr int veclen_int = uint8_veclen / 4; // converting uint8_t veclens to int - loadIndex = loadIndex * veclen_int; -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - uint32_t encV[veclen_int]; - raft::ldg( - encV, - reinterpret_cast(data) + loadIndex + j * kIndexGroupSize * veclen_int); - uint32_t queryRegs[veclen_int]; - raft::lds(queryRegs, - reinterpret_cast(query_shared + shmemIndex) + j * veclen_int); -#pragma unroll - for (int k = 0; k < veclen_int; k++) { - compute_dist(dist, queryRegs[k], encV[k]); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); - norm_data = raft::dp4a(encV[k], encV[k], norm_data); - } - } - } - } - __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, - const uint8_t* query, - int baseLoadIndex, - const int lane_id) - { - constexpr int veclen_int = uint8_veclen / 4; // converting uint8_t veclens to int - uint32_t queryReg = - (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; - constexpr int stride = kUnroll * uint8_veclen; - -#pragma unroll - for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - uint32_t encV[veclen_int]; - raft::ldg( - encV, - reinterpret_cast(data) + (lane_id + j * kIndexGroupSize) * veclen_int); - const int d = (i * kUnroll + j) * veclen_int; -#pragma unroll - for (int k = 0; k < veclen_int; ++k) { - uint32_t q = raft::shfl(queryReg, d + k, raft::WarpSize); - compute_dist(dist, q, encV[k]); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(q, q, norm_query); - norm_data = raft::dp4a(encV[k], encV[k], norm_data); - } - } - } - } - } - - __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, - const uint8_t* query, - const int lane_id, - const int dim, - const int dimBlocks) - { - constexpr int veclen_int = uint8_veclen / 4; - const int loadDim = dimBlocks + lane_id * 4; // Here 4 is for 1 - int - uint32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; - for (int d = 0; d < dim - dimBlocks; - d += uint8_veclen, data += kIndexGroupSize * uint8_veclen) { - uint32_t enc[veclen_int]; - raft::ldg(enc, reinterpret_cast(data) + lane_id * veclen_int); -#pragma unroll - for (int k = 0; k < veclen_int; k++) { - uint32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); - compute_dist(dist, q, enc[k]); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(q, q, norm_query); - norm_data = raft::dp4a(enc[k], enc[k], norm_data); - } - } - } - } -}; - -// Keep this specialized uint8 Veclen = 4, because compiler is generating suboptimal code while -// using above common template of int2/int4 -template -struct loadAndComputeDist { - uint32_t& dist; - uint32_t& norm_query; - uint32_t& norm_data; - - __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, - uint32_t& norm_query, - uint32_t& norm_data) - : dist(dist), norm_query(norm_query), norm_data(norm_data) - { - } - - __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, - const uint8_t* query_shared, - int loadIndex, - int shmemIndex) - { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; - uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; - compute_dist<4, uint8_t, uint32_t>(dist, queryRegs, encV); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); - norm_data = raft::dp4a(encV, encV, norm_data); - } - } - } - __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, - const uint8_t* query, - int baseLoadIndex, - const int lane_id) - { - uint32_t queryReg = - (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; - constexpr int veclen = 4; - constexpr int stride = kUnroll * veclen; - -#pragma unroll - for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; - uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist<4, uint8_t, uint32_t>(dist, q, encV); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(q, q, norm_query); - norm_data = raft::dp4a(encV, encV, norm_data); - } - } - } - } - - __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, - const uint8_t* query, - const int lane_id, - const int dim, - const int dimBlocks) - { - constexpr int veclen = 4; - const int loadDim = dimBlocks + lane_id; - uint32_t queryReg = loadDim < dim ? reinterpret_cast(query)[loadDim] : 0; - for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { - uint32_t enc = reinterpret_cast(data)[lane_id]; - uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); - compute_dist<4, uint8_t, uint32_t>(dist, q, enc); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(q, q, norm_query); - norm_data = raft::dp4a(enc, enc, norm_data); - } - } - } -}; - -template -struct loadAndComputeDist { - uint32_t& dist; - uint32_t& norm_query; - uint32_t& norm_data; - - __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, - uint32_t& norm_query, - uint32_t& norm_data) - : dist(dist), norm_query(norm_query), norm_data(norm_data) - { - } - - __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, - const uint8_t* query_shared, - int loadIndex, - int shmemIndex) - { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; - uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; - compute_dist<2, uint8_t, uint32_t>(dist, queryRegs, encV); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); - norm_data = raft::dp4a(encV, encV, norm_data); - } - } - } - - __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, - const uint8_t* query, - int baseLoadIndex, - const int lane_id) - { - uint32_t queryReg = - (lane_id < 16) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; - constexpr int veclen = 2; - constexpr int stride = kUnroll * veclen; - -#pragma unroll - for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; - uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist<2, uint8_t, uint32_t>(dist, q, encV); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(q, q, norm_query); - norm_data = raft::dp4a(encV, encV, norm_data); - } - } - } - } - - __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, - const uint8_t* query, - const int lane_id, - const int dim, - const int dimBlocks) - { - constexpr int veclen = 2; - int loadDim = dimBlocks + lane_id * veclen; - uint32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; - for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { - uint32_t enc = reinterpret_cast(data)[lane_id]; - uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); - compute_dist<2, uint8_t, uint32_t>(dist, q, enc); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(q, q, norm_query); - norm_data = raft::dp4a(enc, enc, norm_data); - } - } - } -}; - -template -struct loadAndComputeDist { - uint32_t& dist; - uint32_t& norm_query; - uint32_t& norm_data; - - __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, - uint32_t& norm_query, - uint32_t& norm_data) - : dist(dist), norm_query(norm_query), norm_data(norm_data) - { - } - - __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, - const uint8_t* query_shared, - int loadIndex, - int shmemIndex) - { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - uint32_t encV = data[loadIndex + j * kIndexGroupSize]; - uint32_t queryRegs = query_shared[shmemIndex + j]; - compute_dist<1, uint8_t, uint32_t>(dist, queryRegs, encV); - if constexpr (ComputeNorm) { - norm_query += queryRegs * queryRegs; - norm_data += encV * encV; - } - } - } - - __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, - const uint8_t* query, - int baseLoadIndex, - const int lane_id) - { - uint32_t queryReg = query[baseLoadIndex + lane_id]; - constexpr int veclen = 1; - constexpr int stride = kUnroll * veclen; - -#pragma unroll - for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - uint32_t encV = data[lane_id + j * kIndexGroupSize]; - uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist<1, uint8_t, uint32_t>(dist, q, encV); - if constexpr (ComputeNorm) { - norm_query += q * q; - norm_data += encV * encV; - } - } - } - } - - __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, - const uint8_t* query, - const int lane_id, - const int dim, - const int dimBlocks) - { - constexpr int veclen = 1; - int loadDim = dimBlocks + lane_id; - uint32_t queryReg = loadDim < dim ? query[loadDim] : 0; - for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { - uint32_t enc = data[lane_id]; - uint32_t q = raft::shfl(queryReg, d, raft::WarpSize); - compute_dist<1, uint8_t, uint32_t>(dist, q, enc); - if constexpr (ComputeNorm) { - norm_query += q * q; - norm_data += enc * enc; - } - } - } -}; - -// This device function is for int8 veclens 4, 8 and 16 -template -struct loadAndComputeDist { - int32_t& dist; - int32_t& norm_query; - int32_t& norm_data; - - __device__ __forceinline__ loadAndComputeDist(int32_t& dist, - int32_t& norm_query, - int32_t& norm_data) - : dist(dist), norm_query(norm_query), norm_data(norm_data) - { - } - - __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, - const int8_t* query_shared, - int loadIndex, - int shmemIndex) - { - constexpr int veclen_int = int8_veclen / 4; // converting int8_t veclens to int - -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - int32_t encV[veclen_int]; - raft::ldg( - encV, - reinterpret_cast(data) + (loadIndex + j * kIndexGroupSize) * veclen_int); - int32_t queryRegs[veclen_int]; - raft::lds(queryRegs, - reinterpret_cast(query_shared + shmemIndex) + j * veclen_int); -#pragma unroll - for (int k = 0; k < veclen_int; k++) { - compute_dist(dist, queryRegs[k], encV[k]); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); - norm_data = raft::dp4a(encV[k], encV[k], norm_data); - } - } - } - } - - __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, - const int8_t* query, - int baseLoadIndex, - const int lane_id) - { - constexpr int veclen_int = int8_veclen / 4; // converting int8_t veclens to int - - int32_t queryReg = - (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; - constexpr int stride = kUnroll * int8_veclen; - -#pragma unroll - for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - int32_t encV[veclen_int]; - raft::ldg( - encV, - reinterpret_cast(data) + (lane_id + j * kIndexGroupSize) * veclen_int); - const int d = (i * kUnroll + j) * veclen_int; -#pragma unroll - for (int k = 0; k < veclen_int; ++k) { - int32_t q = raft::shfl(queryReg, d + k, raft::WarpSize); - compute_dist(dist, q, encV[k]); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(q, q, norm_query); - norm_data = raft::dp4a(encV[k], encV[k], norm_data); - } - } - } - } - } - - __device__ __forceinline__ void runLoadShflAndComputeRemainder( - const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) - { - constexpr int veclen_int = int8_veclen / 4; - const int loadDim = dimBlocks + lane_id * 4; // Here 4 is for 1 - int; - int32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; - for (int d = 0; d < dim - dimBlocks; d += int8_veclen, data += kIndexGroupSize * int8_veclen) { - int32_t enc[veclen_int]; - raft::ldg(enc, reinterpret_cast(data) + lane_id * veclen_int); -#pragma unroll - for (int k = 0; k < veclen_int; k++) { - int32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); // Here 4 is for 1 - int; - compute_dist(dist, q, enc[k]); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(q, q, norm_query); - norm_data = raft::dp4a(enc[k], enc[k], norm_data); - } - } - } - } -}; - -template -struct loadAndComputeDist { - int32_t& dist; - int32_t& norm_query; - int32_t& norm_data; - __device__ __forceinline__ loadAndComputeDist(int32_t& dist, - int32_t& norm_query, - int32_t& norm_data) - : dist(dist), norm_query(norm_query), norm_data(norm_data) - { - } - __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, - const int8_t* query_shared, - int loadIndex, - int shmemIndex) - { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - int32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; - int32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; - compute_dist<2, int8_t, int32_t>(dist, queryRegs, encV); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); - norm_data = raft::dp4a(encV, encV, norm_data); - } - } - } - - __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, - const int8_t* query, - int baseLoadIndex, - const int lane_id) - { - int32_t queryReg = - (lane_id < 16) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; - constexpr int veclen = 2; - constexpr int stride = kUnroll * veclen; - -#pragma unroll - for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - int32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; - int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist<2, int8_t, int32_t>(dist, q, encV); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(queryReg, queryReg, norm_query); - norm_data = raft::dp4a(encV, encV, norm_data); - } - } - } - } - - __device__ __forceinline__ void runLoadShflAndComputeRemainder( - const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) - { - constexpr int veclen = 2; - int loadDim = dimBlocks + lane_id * veclen; - int32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; - for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { - int32_t enc = reinterpret_cast(data + lane_id * veclen)[0]; - int32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); - compute_dist<2, int8_t, int32_t>(dist, q, enc); - if constexpr (ComputeNorm) { - norm_query = raft::dp4a(q, q, norm_query); - norm_data = raft::dp4a(enc, enc, norm_data); - } - } - } -}; - -template -struct loadAndComputeDist { - int32_t& dist; - int32_t& norm_query; - int32_t& norm_data; - __device__ __forceinline__ loadAndComputeDist(int32_t& dist, - int32_t& norm_query, - int32_t& norm_data) - : dist(dist), norm_query(norm_query), norm_data(norm_data) - { - } - - __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, - const int8_t* query_shared, - int loadIndex, - int shmemIndex) - { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - compute_dist<1, int8_t, int32_t>( - dist, query_shared[shmemIndex + j], data[loadIndex + j * kIndexGroupSize]); - if constexpr (ComputeNorm) { - norm_query += int32_t{query_shared[shmemIndex + j]} * int32_t{query_shared[shmemIndex + j]}; - norm_data += int32_t{data[loadIndex + j * kIndexGroupSize]} * - int32_t{data[loadIndex + j * kIndexGroupSize]}; - } - } - } - - __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, - const int8_t* query, - int baseLoadIndex, - const int lane_id) - { - constexpr int veclen = 1; - constexpr int stride = kUnroll * veclen; - int32_t queryReg = query[baseLoadIndex + lane_id]; - -#pragma unroll - for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); - compute_dist<1, int8_t, int32_t>(dist, q, data[lane_id + j * kIndexGroupSize]); - if constexpr (ComputeNorm) { - norm_query += q * q; - norm_data += data[lane_id + j * kIndexGroupSize] * data[lane_id + j * kIndexGroupSize]; - } - } - } - } - __device__ __forceinline__ void runLoadShflAndComputeRemainder( - const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) - { - constexpr int veclen = 1; - const int loadDim = dimBlocks + lane_id; - int32_t queryReg = loadDim < dim ? query[loadDim] : 0; - for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { - int32_t q = raft::shfl(queryReg, d, raft::WarpSize); - compute_dist<1, int8_t, int32_t>(dist, q, data[lane_id]); - if constexpr (ComputeNorm) { - norm_query += q * q; - norm_data += int32_t{data[lane_id]} * int32_t{data[lane_id]}; - } - } - } -}; - -// switch to dummy blocksort when Capacity is 0 this explicit dummy is chosen -// to support access to warpsort constants like ::queue_t::kDummy -template -struct flat_block_sort { - using type = raft::matrix::detail::select::warpsort::block_sort< - raft::matrix::detail::select::warpsort::warp_sort_filtered, - Capacity, - Ascending, - T, - IdxT>; -}; - -template -struct flat_block_sort<0, Ascending, T, IdxT> - : ivf::detail::dummy_block_sort_t { - using type = ivf::detail::dummy_block_sort_t; -}; - -template -using block_sort_t = typename flat_block_sort::type; - -/** - * Scan clusters for nearest neighbors of the query vectors. - * See `ivfflat_interleaved_scan` for more information. - * - * The clusters are stored in the interleaved index format described in ivf_flat_types.hpp. - * For each query vector, a set of clusters is probed: the distance to each vector in the cluster is - * calculated, and the top-k nearest neighbors are selected. - * - * @param compute_dist distance function - * @param query_smem_elems number of dimensions of the query vector to fit in a shared memory of a - * block; this number must be a multiple of `WarpSize * Veclen`. - * @param[in] query a pointer to all queries in a row-major contiguous format [gridDim.y, dim] - * @param[in] coarse_index a pointer to the cluster indices to search through [n_probes] - * @param[in] list_indices index.indices - * @param[in] list_data index.data - * @param[in] list_sizes index.list_sizes - * @param[in] list_offsets index.list_offsets - * @param n_probes - * @param k - * @param dim - * @param sample_filter - * @param[out] neighbors - * @param[out] distances - */ -template -RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) - interleaved_scan_kernel(PostLambda post_process, - const uint32_t query_smem_elems, - const T* query, - const uint32_t* coarse_index, - const T* const* list_data_ptrs, - const uint32_t* list_sizes, - const uint32_t queries_offset, - const uint32_t n_probes, - const uint32_t k, - const uint32_t max_samples, - const uint32_t* chunk_indices, - const uint32_t dim, - IvfSampleFilterT sample_filter, - uint32_t* neighbors, - float* distances) -{ - extern __shared__ __align__(256) uint8_t interleaved_scan_kernel_smem[]; - constexpr bool kManageLocalTopK = Capacity > 0; - // Using shared memory for the (part of the) query; - // This allows to save on global memory bandwidth when reading index and query - // data at the same time. - // Its size is `query_smem_elems`. - T* query_shared = reinterpret_cast(interleaved_scan_kernel_smem); - // Make the query input and output point to this block's shared query - { - const int query_id = blockIdx.y; - query += query_id * dim; - if constexpr (kManageLocalTopK) { - neighbors += query_id * k * gridDim.x + blockIdx.x * k; - distances += query_id * k * gridDim.x + blockIdx.x * k; - } else { - distances += query_id * uint64_t(max_samples); - } - chunk_indices += (n_probes * query_id); - coarse_index += query_id * n_probes; - } - - // Copy a part of the query into shared memory for faster processing - copy_vectorized(query_shared, query, std::min(dim, query_smem_elems)); - __syncthreads(); - - using local_topk_t = block_sort_t; - local_topk_t queue(k); - { - using align_warp = raft::Pow2; - const int lane_id = align_warp::mod(threadIdx.x); - - // How many full warps needed to compute the distance (without remainder) - const uint32_t full_warps_along_dim = align_warp::roundDown(dim); - - const uint32_t shm_assisted_dim = - (dim > query_smem_elems) ? query_smem_elems : full_warps_along_dim; - - // Every CUDA block scans one cluster at a time. - for (int probe_id = blockIdx.x; probe_id < n_probes; probe_id += gridDim.x) { - const uint32_t list_id = coarse_index[probe_id]; // The id of cluster(list) - - // The number of vectors in each cluster(list); [nlist] - const uint32_t list_length = list_sizes[list_id]; - - // The number of interleaved groups to be processed - const uint32_t num_groups = - align_warp::div(list_length + align_warp::Mask); // ceildiv by power of 2 - - uint32_t sample_offset = 0; - if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; } - assert(list_length == chunk_indices[probe_id] - sample_offset); - assert(sample_offset + list_length <= max_samples); - - constexpr int kUnroll = raft::WarpSize / Veclen; - constexpr uint32_t kNumWarps = kThreadsPerBlock / raft::WarpSize; - // Every warp reads WarpSize vectors and computes the distances to them. - // Then, the distances and corresponding ids are distributed among the threads, - // and each thread adds one (id, dist) pair to the filtering queue. - for (uint32_t group_id = align_warp::div(threadIdx.x); group_id < num_groups; - group_id += kNumWarps) { - AccT dist = 0; - AccT norm_query = 0; - AccT norm_dataset = 0; - // This is where this warp begins reading data (start position of an interleaved group) - const T* data = list_data_ptrs[list_id] + (group_id * kIndexGroupSize) * dim; - - // This is the vector a given lane/thread handles - const uint32_t vec_id = group_id * raft::WarpSize + lane_id; - const bool valid = - vec_id < list_length && sample_filter(queries_offset + blockIdx.y, list_id, vec_id); - - if (valid) { - // Process first shm_assisted_dim dimensions (always using shared memory) - loadAndComputeDist lc( - dist, norm_query, norm_dataset); - for (int pos = 0; pos < shm_assisted_dim; - pos += raft::WarpSize, data += kIndexGroupSize * raft::WarpSize) { - lc.runLoadShmemCompute(data, query_shared, lane_id, pos); - } - - if (dim > query_smem_elems) { - // The default path - using shfl ops - for dimensions beyond query_smem_elems - loadAndComputeDist lc( - dist, norm_query, norm_dataset); - for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += raft::WarpSize) { - lc.runLoadShflAndCompute(data, query, pos, lane_id); - } - lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim); - } else { - // when shm_assisted_dim == full_warps_along_dim < dim - loadAndComputeDist<1, Veclen, T, AccT, ComputeNorm> lc(dist, norm_query, norm_dataset); - for (int pos = full_warps_along_dim; pos < dim; - pos += Veclen, data += kIndexGroupSize * Veclen) { - lc.runLoadShmemCompute(data, query_shared, lane_id, pos); - } - } - } - - // Enqueue one element per thread - float val = valid ? static_cast(dist) : local_topk_t::queue_t::kDummy; - - if constexpr (ComputeNorm) { - if (valid) - val = val / (raft::sqrt(static_cast(norm_query)) * - raft::sqrt(static_cast(norm_dataset))); - } - if constexpr (kManageLocalTopK) { - queue.add(val, sample_offset + vec_id); - } else { - if (vec_id < list_length) distances[sample_offset + vec_id] = val; - } - } - - // fill up unused slots for current query - if constexpr (!kManageLocalTopK) { - if (probe_id + 1 == n_probes) { - for (uint32_t i = threadIdx.x + sample_offset + list_length; i < max_samples; - i += blockDim.x) { - distances[i] = local_topk_t::queue_t::kDummy; - } - } - } - } - } - - // finalize and store selected neighbours - if constexpr (kManageLocalTopK) { - __syncthreads(); - queue.done(interleaved_scan_kernel_smem); - queue.store(distances, neighbors, post_process); - } -} - /** * Configure the gridDim.x to maximize GPU occupancy, but reduce the output size */ @@ -1080,7 +160,7 @@ template void launch_kernel(PostLambda post_process, @@ -1093,7 +173,10 @@ void launch_kernel(PostLambda post_process, const uint32_t k, const uint32_t max_samples, const uint32_t* chunk_indices, - IvfSampleFilterT sample_filter, + IdxT* const* const inds_ptrs, + cuda::std::optional bitset_ptr, + cuda::std::optional bitset_len, + cuda::std::optional original_nbits, uint32_t* neighbors, float* distances, uint32_t& grid_dim_x, @@ -1116,12 +199,12 @@ void launch_kernel(PostLambda post_process, auto kernel_planner = InterleavedScanPlanner()), decltype(get_acc_type_tag()), decltype(get_idx_type_tag()), - decltype(get_filter_type_tag()), decltype(get_post_lambda_tag())>( Capacity, Veclen, Ascending, ComputeNorm); kernel_planner.template add_metric_device_function()), decltype(get_acc_type_tag())>( get_metric_name(), Veclen); + kernel_planner.add_filter_device_function(get_filter_name()); auto kernel_launcher = kernel_planner.get_launcher(); const int max_query_smem = 16384; @@ -1190,7 +273,11 @@ void launch_kernel(PostLambda post_process, max_samples, chunk_indices, index.dim(), - sample_filter, + // sample_filter, + inds_ptrs, + bitset_ptr.value_or(nullptr), + bitset_len.value_or(0), + original_nbits.value_or(0), neighbors, distances); queries += grid_dim_y * index.dim(); @@ -1212,7 +299,7 @@ template void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... args) { @@ -1226,7 +313,7 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg T, AccT, IdxT, - IvfSampleFilterT, + IvfSampleFilterTag, tag_metric_euclidean, raft::identity_op>({}, std::forward(args)...); case cuvs::distance::DistanceType::L2SqrtExpanded: @@ -1238,7 +325,7 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg T, AccT, IdxT, - IvfSampleFilterT, + IvfSampleFilterTag, tag_metric_euclidean, raft::sqrt_op>({}, std::forward(args)...); case cuvs::distance::DistanceType::InnerProduct: @@ -1249,7 +336,7 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg T, AccT, IdxT, - IvfSampleFilterT, + IvfSampleFilterTag, tag_metric_inner_product, raft::identity_op>({}, std::forward(args)...); case cuvs::distance::DistanceType::CosineExpanded: @@ -1261,7 +348,7 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg T, AccT, IdxT, - IvfSampleFilterT, + IvfSampleFilterTag, tag_metric_inner_product>( raft::compose_op(raft::add_const_op{1.0f}, raft::mul_const_op{-1.0f}), std::forward(args)...); // NB: update the description of `knn::ivf_flat::build` when @@ -1277,7 +364,7 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg template (1, 16 / sizeof(T))> struct select_interleaved_scan_kernel { @@ -1292,7 +379,7 @@ struct select_interleaved_scan_kernel { { if constexpr (Capacity > 0) { if (k_max == 0 || k_max > Capacity) { - return select_interleaved_scan_kernel::run( + return select_interleaved_scan_kernel::run( k_max, veclen, select_min, std::forward(args)...); } } @@ -1301,7 +388,7 @@ struct select_interleaved_scan_kernel { return select_interleaved_scan_kernel::run(k_max, veclen, @@ -1311,7 +398,7 @@ struct select_interleaved_scan_kernel { } if constexpr (Veclen > 1) { if (veclen % Veclen != 0) { - return select_interleaved_scan_kernel::run( + return select_interleaved_scan_kernel::run( k_max, 1, select_min, std::forward(args)...); } } @@ -1325,10 +412,10 @@ struct select_interleaved_scan_kernel { veclen == Veclen, "Veclen must be power-of-two not bigger than the maximum allowed size for this data type."); if (select_min) { - launch_with_fixed_consts( + launch_with_fixed_consts( std::forward(args)...); } else { - launch_with_fixed_consts( + launch_with_fixed_consts( std::forward(args)...); } } @@ -1385,26 +472,41 @@ void ivfflat_interleaved_scan(const index& index, { const int capacity = raft::bound_by_power_of_two(k); - auto filter_adapter = cuvs::neighbors::filtering::ivf_to_sample_filter( - index.inds_ptrs().data_handle(), sample_filter); - select_interleaved_scan_kernel::run(capacity, - index.veclen(), - select_min, - metric, - index, - queries, - coarse_query_results, - n_queries, - queries_offset, - n_probes, - k, - max_samples, - chunk_indices, - filter_adapter, - neighbors, - distances, - grid_dim_x, - stream); + // auto filter_adapter = cuvs::neighbors::filtering::ivf_to_sample_filter( + // index.inds_ptrs().data_handle(), sample_filter); + cuda::std::optional bitset_ptr; + cuda::std::optional bitset_len; + cuda::std::optional original_nbits; + + if constexpr (std::is_same_v>) { + bitset_ptr = sample_filter.view().data(); + bitset_len = sample_filter.view().size(); + original_nbits = sample_filter.view().get_original_nbits(); + } + select_interleaved_scan_kernel())>:: + run(capacity, + index.veclen(), + select_min, + metric, + index, + queries, + coarse_query_results, + n_queries, + queries_offset, + n_probes, + k, + max_samples, + chunk_indices, + // filter_adapter, + index.inds_ptrs().data_handle(), + bitset_ptr, + bitset_len, + original_nbits, + neighbors, + distances, + grid_dim_x, + stream); } } // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh new file mode 100644 index 0000000000..7086bafd15 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh @@ -0,0 +1,980 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../ivf_common.cuh" + +#include + +#include +#include +#include +#include + +// This header contains the kernel definition and should only be included +// when compiling JIT-LTO kernel fragments (when BUILD_KERNEL is defined). + +namespace cuvs::neighbors::ivf_flat::detail { + +static constexpr int kThreadsPerBlock = 128; + +// These extern device functions are linked at runtime using JIT-LTO. +template +extern __device__ void compute_dist(AccT& acc, AccT x, AccT y); + +template +extern __device__ bool sample_filter(index_t* const* const inds_ptrs, + const uint32_t query_ix, + const uint32_t cluster_ix, + const uint32_t sample_ix, + uint32_t* bitset_ptr, + index_t bitset_len, + index_t original_nbits); + +/** + * @brief Copy `n` elements per block from one place to another. + * + * @param[out] out target pointer (unique per block) + * @param[in] in source pointer + * @param n number of elements to copy + */ +template +__device__ inline void copy_vectorized(T* out, const T* in, uint32_t n) +{ + constexpr int VecElems = VecBytes / sizeof(T); // NOLINT + using align_bytes = raft::Pow2<(size_t)VecBytes>; + if constexpr (VecElems > 1) { + using align_elems = raft::Pow2; + if (!align_bytes::areSameAlignOffsets(out, in)) { + return copy_vectorized<(VecBytes >> 1), T>(out, in, n); + } + { // process unaligned head + uint32_t head = align_bytes::roundUp(in) - in; + if (head > 0) { + copy_vectorized(out, in, head); + n -= head; + in += head; + out += head; + } + } + { // process main part vectorized + using vec_t = typename raft::IOType::Type; + copy_vectorized( + reinterpret_cast(out), reinterpret_cast(in), align_elems::div(n)); + } + { // process unaligned tail + uint32_t tail = align_elems::mod(n); + if (tail > 0) { + n -= tail; + copy_vectorized(out + n, in + n, tail); + } + } + } + if constexpr (VecElems <= 1) { + for (int i = threadIdx.x; i < n; i += blockDim.x) { + out[i] = in[i]; + } + } +} + +/** + * @brief Load a part of a vector from the index and from query, compute the (part of the) distance + * between them, and aggregate it using the provided Lambda; one structure per thread, per query, + * and per index item. + * + * @tparam kUnroll elements per loop (normally, kUnroll = WarpSize / Veclen) + * @tparam Lambda computing the part of the distance for one dimension and aggregating it: + * void (AccT& acc, AccT x, AccT y) + * @tparam Veclen size of the vectorized load + * @tparam T type of the data in the query and the index + * @tparam AccT type of the accumulated value (an optimization for 8bit values to be loaded as 32bit + * values) + */ +template +struct loadAndComputeDist { + AccT& dist; + AccT& norm_query; + AccT& norm_data; + + __device__ __forceinline__ loadAndComputeDist(AccT& dist, AccT& norm_query, AccT& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) + { + } + + /** + * Load parts of vectors from the index and query and accumulates the partial distance. + * This version assumes the query is stored in shared memory. + * Every thread here processes exactly kUnroll * Veclen elements independently of others. + */ + template + __device__ __forceinline__ void runLoadShmemCompute(const T* const& data, + const T* query_shared, + IdxT loadIndex, + IdxT shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + T encV[Veclen]; + raft::ldg(encV, data + (loadIndex + j * kIndexGroupSize) * Veclen); + T queryRegs[Veclen]; + raft::lds(queryRegs, &query_shared[shmemIndex + j * Veclen]); +#pragma unroll + for (int k = 0; k < Veclen; ++k) { + compute_dist(dist, queryRegs[k], encV[k]); + if constexpr (ComputeNorm) { + norm_query += queryRegs[k] * queryRegs[k]; + norm_data += encV[k] * encV[k]; + } + } + } + } + + /** + * Load parts of vectors from the index and query and accumulates the partial distance. + * This version assumes the query is stored in the global memory and is different for every + * thread. One warp loads exactly WarpSize query elements at once and then reshuffles them into + * corresponding threads (`WarpSize / (kUnroll * Veclen)` elements per thread at once). + */ + template + __device__ __forceinline__ void runLoadShflAndCompute(const T*& data, + const T* query, + IdxT baseLoadIndex, + const int lane_id) + { + T queryReg = query[baseLoadIndex + lane_id]; + constexpr int stride = kUnroll * Veclen; + constexpr int totalIter = raft::WarpSize / stride; + constexpr int gmemStride = stride * kIndexGroupSize; +#pragma unroll + for (int i = 0; i < totalIter; ++i, data += gmemStride) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + T encV[Veclen]; + raft::ldg(encV, data + (lane_id + j * kIndexGroupSize) * Veclen); + const int d = (i * kUnroll + j) * Veclen; +#pragma unroll + for (int k = 0; k < Veclen; ++k) { + T q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, encV[k]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += encV[k] * encV[k]; + } + } + } + } + } + + /** + * Load parts of vectors from the index and query and accumulates the partial distance. + * This version augments `runLoadShflAndCompute` when `dim` is not a multiple of `WarpSize`. + */ + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const T*& data, const T* query, const int lane_id, const int dim, const int dimBlocks) + { + const int loadDim = dimBlocks + lane_id; + T queryReg = loadDim < dim ? query[loadDim] : T{0}; + const int loadDataIdx = lane_id * Veclen; + for (int d = 0; d < dim - dimBlocks; d += Veclen, data += kIndexGroupSize * Veclen) { + T enc[Veclen]; + raft::ldg(enc, data + loadDataIdx); +#pragma unroll + for (int k = 0; k < Veclen; k++) { + T q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, enc[k]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += enc[k] * enc[k]; + } + } + } + } +}; + +// This handles uint8_t 8, 16 Veclens +template +struct loadAndComputeDist { + uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; + + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, + uint32_t& norm_query, + uint32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { + constexpr int veclen_int = uint8_veclen / 4; // converting uint8_t veclens to int + loadIndex = loadIndex * veclen_int; +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + loadIndex + j * kIndexGroupSize * veclen_int); + uint32_t queryRegs[veclen_int]; + raft::lds(queryRegs, + reinterpret_cast(query_shared + shmemIndex) + j * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + compute_dist(dist, queryRegs[k], encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } + } + } + } + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + constexpr int veclen_int = uint8_veclen / 4; // converting uint8_t veclens to int + uint32_t queryReg = + (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int stride = kUnroll * uint8_veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + (lane_id + j * kIndexGroupSize) * veclen_int); + const int d = (i * kUnroll + j) * veclen_int; +#pragma unroll + for (int k = 0; k < veclen_int; ++k) { + uint32_t q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen_int = uint8_veclen / 4; + const int loadDim = dimBlocks + lane_id * 4; // Here 4 is for 1 - int + uint32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; + d += uint8_veclen, data += kIndexGroupSize * uint8_veclen) { + uint32_t enc[veclen_int]; + raft::ldg(enc, reinterpret_cast(data) + lane_id * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + uint32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); + compute_dist(dist, q, enc[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc[k], enc[k], norm_data); + } + } + } + } +}; + +// Keep this specialized uint8 Veclen = 4, because compiler is generating suboptimal code while +// using above common template of int2/int4 +template +struct loadAndComputeDist { + uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; + + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, + uint32_t& norm_query, + uint32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; + uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; + compute_dist<4, uint8_t, uint32_t>(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + uint32_t queryReg = + (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int veclen = 4; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; + uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist<4, uint8_t, uint32_t>(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen = 4; + const int loadDim = dimBlocks + lane_id; + uint32_t queryReg = loadDim < dim ? reinterpret_cast(query)[loadDim] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + uint32_t enc = reinterpret_cast(data)[lane_id]; + uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); + compute_dist<4, uint8_t, uint32_t>(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc, enc, norm_data); + } + } + } +}; + +template +struct loadAndComputeDist { + uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; + + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, + uint32_t& norm_query, + uint32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; + uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; + compute_dist<2, uint8_t, uint32_t>(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + uint32_t queryReg = + (lane_id < 16) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int veclen = 2; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; + uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist<2, uint8_t, uint32_t>(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen = 2; + int loadDim = dimBlocks + lane_id * veclen; + uint32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + uint32_t enc = reinterpret_cast(data)[lane_id]; + uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); + compute_dist<2, uint8_t, uint32_t>(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc, enc, norm_data); + } + } + } +}; + +template +struct loadAndComputeDist { + uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; + + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, + uint32_t& norm_query, + uint32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = data[loadIndex + j * kIndexGroupSize]; + uint32_t queryRegs = query_shared[shmemIndex + j]; + compute_dist<1, uint8_t, uint32_t>(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query += queryRegs * queryRegs; + norm_data += encV * encV; + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + uint32_t queryReg = query[baseLoadIndex + lane_id]; + constexpr int veclen = 1; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = data[lane_id + j * kIndexGroupSize]; + uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist<1, uint8_t, uint32_t>(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += encV * encV; + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen = 1; + int loadDim = dimBlocks + lane_id; + uint32_t queryReg = loadDim < dim ? query[loadDim] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + uint32_t enc = data[lane_id]; + uint32_t q = raft::shfl(queryReg, d, raft::WarpSize); + compute_dist<1, uint8_t, uint32_t>(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += enc * enc; + } + } + } +}; + +// This device function is for int8 veclens 4, 8 and 16 +template +struct loadAndComputeDist { + int32_t& dist; + int32_t& norm_query; + int32_t& norm_data; + + __device__ __forceinline__ loadAndComputeDist(int32_t& dist, + int32_t& norm_query, + int32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, + const int8_t* query_shared, + int loadIndex, + int shmemIndex) + { + constexpr int veclen_int = int8_veclen / 4; // converting int8_t veclens to int + +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + (loadIndex + j * kIndexGroupSize) * veclen_int); + int32_t queryRegs[veclen_int]; + raft::lds(queryRegs, + reinterpret_cast(query_shared + shmemIndex) + j * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + compute_dist(dist, queryRegs[k], encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, + const int8_t* query, + int baseLoadIndex, + const int lane_id) + { + constexpr int veclen_int = int8_veclen / 4; // converting int8_t veclens to int + + int32_t queryReg = + (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int stride = kUnroll * int8_veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + (lane_id + j * kIndexGroupSize) * veclen_int); + const int d = (i * kUnroll + j) * veclen_int; +#pragma unroll + for (int k = 0; k < veclen_int; ++k) { + int32_t q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) + { + constexpr int veclen_int = int8_veclen / 4; + const int loadDim = dimBlocks + lane_id * 4; // Here 4 is for 1 - int; + int32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; d += int8_veclen, data += kIndexGroupSize * int8_veclen) { + int32_t enc[veclen_int]; + raft::ldg(enc, reinterpret_cast(data) + lane_id * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + int32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); // Here 4 is for 1 - int; + compute_dist(dist, q, enc[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc[k], enc[k], norm_data); + } + } + } + } +}; + +template +struct loadAndComputeDist { + int32_t& dist; + int32_t& norm_query; + int32_t& norm_data; + __device__ __forceinline__ loadAndComputeDist(int32_t& dist, + int32_t& norm_query, + int32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) + { + } + __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, + const int8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; + int32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; + compute_dist<2, int8_t, int32_t>(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, + const int8_t* query, + int baseLoadIndex, + const int lane_id) + { + int32_t queryReg = + (lane_id < 16) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int veclen = 2; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; + int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist<2, int8_t, int32_t>(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryReg, queryReg, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) + { + constexpr int veclen = 2; + int loadDim = dimBlocks + lane_id * veclen; + int32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + int32_t enc = reinterpret_cast(data + lane_id * veclen)[0]; + int32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); + compute_dist<2, int8_t, int32_t>(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc, enc, norm_data); + } + } + } +}; + +template +struct loadAndComputeDist { + int32_t& dist; + int32_t& norm_query; + int32_t& norm_data; + __device__ __forceinline__ loadAndComputeDist(int32_t& dist, + int32_t& norm_query, + int32_t& norm_data) + : dist(dist), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, + const int8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + compute_dist<1, int8_t, int32_t>( + dist, query_shared[shmemIndex + j], data[loadIndex + j * kIndexGroupSize]); + if constexpr (ComputeNorm) { + norm_query += int32_t{query_shared[shmemIndex + j]} * int32_t{query_shared[shmemIndex + j]}; + norm_data += int32_t{data[loadIndex + j * kIndexGroupSize]} * + int32_t{data[loadIndex + j * kIndexGroupSize]}; + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, + const int8_t* query, + int baseLoadIndex, + const int lane_id) + { + constexpr int veclen = 1; + constexpr int stride = kUnroll * veclen; + int32_t queryReg = query[baseLoadIndex + lane_id]; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist<1, int8_t, int32_t>(dist, q, data[lane_id + j * kIndexGroupSize]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += data[lane_id + j * kIndexGroupSize] * data[lane_id + j * kIndexGroupSize]; + } + } + } + } + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) + { + constexpr int veclen = 1; + const int loadDim = dimBlocks + lane_id; + int32_t queryReg = loadDim < dim ? query[loadDim] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + int32_t q = raft::shfl(queryReg, d, raft::WarpSize); + compute_dist<1, int8_t, int32_t>(dist, q, data[lane_id]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += int32_t{data[lane_id]} * int32_t{data[lane_id]}; + } + } + } +}; + +// switch to dummy blocksort when Capacity is 0 this explicit dummy is chosen +// to support access to warpsort constants like ::queue_t::kDummy +template +struct flat_block_sort { + using type = raft::matrix::detail::select::warpsort::block_sort< + raft::matrix::detail::select::warpsort::warp_sort_filtered, + Capacity, + Ascending, + T, + IdxT>; +}; + +template +struct flat_block_sort<0, Ascending, T, IdxT> + : ivf::detail::dummy_block_sort_t { + using type = ivf::detail::dummy_block_sort_t; +}; + +template +using block_sort_t = typename flat_block_sort::type; + +/** + * Scan clusters for nearest neighbors of the query vectors. + * See `ivfflat_interleaved_scan` for more information. + * + * The clusters are stored in the interleaved index format described in ivf_flat_types.hpp. + * For each query vector, a set of clusters is probed: the distance to each vector in the cluster is + * calculated, and the top-k nearest neighbors are selected. + * + * @param compute_dist distance function + * @param query_smem_elems number of dimensions of the query vector to fit in a shared memory of a + * block; this number must be a multiple of `WarpSize * Veclen`. + * @param[in] query a pointer to all queries in a row-major contiguous format [gridDim.y, dim] + * @param[in] coarse_index a pointer to the cluster indices to search through [n_probes] + * @param[in] list_indices index.indices + * @param[in] list_data index.data + * @param[in] list_sizes index.list_sizes + * @param[in] list_offsets index.list_offsets + * @param n_probes + * @param k + * @param dim + * @param sample_filter + * @param[out] neighbors + * @param[out] distances + */ +template +RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) + interleaved_scan_kernel(PostLambda post_process, + const uint32_t query_smem_elems, + const T* query, + const uint32_t* coarse_index, + const T* const* list_data_ptrs, + const uint32_t* list_sizes, + const uint32_t queries_offset, + const uint32_t n_probes, + const uint32_t k, + const uint32_t max_samples, + const uint32_t* chunk_indices, + const uint32_t dim, + IdxT* const* const inds_ptrs, + uint32_t* bitset_ptr, + IdxT bitset_len, + IdxT original_nbits, + uint32_t* neighbors, + float* distances) +{ + extern __shared__ __align__(256) uint8_t interleaved_scan_kernel_smem[]; + constexpr bool kManageLocalTopK = Capacity > 0; + // Using shared memory for the (part of the) query; + // This allows to save on global memory bandwidth when reading index and query + // data at the same time. + // Its size is `query_smem_elems`. + T* query_shared = reinterpret_cast(interleaved_scan_kernel_smem); + // Make the query input and output point to this block's shared query + { + const int query_id = blockIdx.y; + query += query_id * dim; + if constexpr (kManageLocalTopK) { + neighbors += query_id * k * gridDim.x + blockIdx.x * k; + distances += query_id * k * gridDim.x + blockIdx.x * k; + } else { + distances += query_id * uint64_t(max_samples); + } + chunk_indices += (n_probes * query_id); + coarse_index += query_id * n_probes; + } + + // Copy a part of the query into shared memory for faster processing + copy_vectorized(query_shared, query, std::min(dim, query_smem_elems)); + __syncthreads(); + + using local_topk_t = block_sort_t; + local_topk_t queue(k); + { + using align_warp = raft::Pow2; + const int lane_id = align_warp::mod(threadIdx.x); + + // How many full warps needed to compute the distance (without remainder) + const uint32_t full_warps_along_dim = align_warp::roundDown(dim); + + const uint32_t shm_assisted_dim = + (dim > query_smem_elems) ? query_smem_elems : full_warps_along_dim; + + // Every CUDA block scans one cluster at a time. + for (int probe_id = blockIdx.x; probe_id < n_probes; probe_id += gridDim.x) { + const uint32_t list_id = coarse_index[probe_id]; // The id of cluster(list) + + // The number of vectors in each cluster(list); [nlist] + const uint32_t list_length = list_sizes[list_id]; + + // The number of interleaved groups to be processed + const uint32_t num_groups = + align_warp::div(list_length + align_warp::Mask); // ceildiv by power of 2 + + uint32_t sample_offset = 0; + if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; } + assert(list_length == chunk_indices[probe_id] - sample_offset); + assert(sample_offset + list_length <= max_samples); + + constexpr int kUnroll = raft::WarpSize / Veclen; + constexpr uint32_t kNumWarps = kThreadsPerBlock / raft::WarpSize; + // Every warp reads WarpSize vectors and computes the distances to them. + // Then, the distances and corresponding ids are distributed among the threads, + // and each thread adds one (id, dist) pair to the filtering queue. + for (uint32_t group_id = align_warp::div(threadIdx.x); group_id < num_groups; + group_id += kNumWarps) { + AccT dist = 0; + AccT norm_query = 0; + AccT norm_dataset = 0; + // This is where this warp begins reading data (start position of an interleaved group) + const T* data = list_data_ptrs[list_id] + (group_id * kIndexGroupSize) * dim; + + // This is the vector a given lane/thread handles + const uint32_t vec_id = group_id * raft::WarpSize + lane_id; + const bool valid = vec_id < list_length && sample_filter(inds_ptrs, + queries_offset + blockIdx.y, + list_id, + vec_id, + bitset_ptr, + bitset_len, + original_nbits); + + if (valid) { + // Process first shm_assisted_dim dimensions (always using shared memory) + loadAndComputeDist lc( + dist, norm_query, norm_dataset); + for (int pos = 0; pos < shm_assisted_dim; + pos += raft::WarpSize, data += kIndexGroupSize * raft::WarpSize) { + lc.runLoadShmemCompute(data, query_shared, lane_id, pos); + } + + if (dim > query_smem_elems) { + // The default path - using shfl ops - for dimensions beyond query_smem_elems + loadAndComputeDist lc( + dist, norm_query, norm_dataset); + for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += raft::WarpSize) { + lc.runLoadShflAndCompute(data, query, pos, lane_id); + } + lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim); + } else { + // when shm_assisted_dim == full_warps_along_dim < dim + loadAndComputeDist<1, Veclen, T, AccT, ComputeNorm> lc(dist, norm_query, norm_dataset); + for (int pos = full_warps_along_dim; pos < dim; + pos += Veclen, data += kIndexGroupSize * Veclen) { + lc.runLoadShmemCompute(data, query_shared, lane_id, pos); + } + } + } + + // Enqueue one element per thread + float val = valid ? static_cast(dist) : local_topk_t::queue_t::kDummy; + + if constexpr (ComputeNorm) { + if (valid) + val = val / (raft::sqrt(static_cast(norm_query)) * + raft::sqrt(static_cast(norm_dataset))); + } + if constexpr (kManageLocalTopK) { + queue.add(val, sample_offset + vec_id); + } else { + if (vec_id < list_length) distances[sample_offset + vec_id] = val; + } + } + + // fill up unused slots for current query + if constexpr (!kManageLocalTopK) { + if (probe_id + 1 == n_probes) { + for (uint32_t i = threadIdx.x + sample_offset + list_length; i < max_samples; + i += blockDim.x) { + distances[i] = local_topk_t::queue_t::kDummy; + } + } + } + } + } + + // finalize and store selected neighbours + if constexpr (kManageLocalTopK) { + __syncthreads(); + queue.done(interleaved_scan_kernel_smem); + queue.store(distances, neighbors, post_process); + } +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh new file mode 100644 index 0000000000..95ea61e194 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../../sample_filter.cuh" + +namespace cuvs::neighbors::ivf_flat::detail { + +template +__device__ bool sample_filter(index_t* const* const inds_ptrs, + const uint32_t query_ix, + const uint32_t cluster_ix, + const uint32_t sample_ix, + uint32_t* bitset_ptr, + index_t bitset_len, + index_t original_nbits) +{ + auto bitset_view = + raft::core::bitset_view{bitset_ptr, bitset_len, original_nbits}; + auto bitset_filter = cuvs::neighbors::filtering::bitset_filter{bitset_view}; + auto ivf_to_sample_filter = cuvs::neighbors::filtering:: + ivf_to_sample_filter>{ + inds_ptrs, bitset_filter}; + return ivf_to_sample_filter(query_ix, cluster_ix, sample_ix); +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh new file mode 100644 index 0000000000..61dfc9de84 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../../sample_filter.cuh" + +namespace cuvs::neighbors::ivf_flat::detail { + +template +__device__ constexpr bool sample_filter(index_t* const* const inds_ptrs, + const uint32_t query_ix, + const uint32_t cluster_ix, + const uint32_t sample_ix, + uint32_t* bitset_ptr, + index_t bitset_len, + index_t original_nbits) +{ + return true; +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py index cfb4e8b12d..da9a033152 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py @@ -95,7 +95,7 @@ def get_final_op_abbreviation(op_str): def generate_filename(params): - """Generate filename from template parameters (WITHOUT metric).""" + """Generate filename from template parameters (WITHOUT metric and filter).""" # params[0]: Capacity (numeric) # params[1]: Veclen (numeric) # params[2]: Ascending (bool) @@ -103,7 +103,7 @@ def generate_filename(params): # params[4]: T (type) # params[5]: AccT (type) # params[6]: IdxT (type) - # params[7]: FilterT (filter type) + # params[7]: FilterT (filter type - EXCLUDED from filename) # params[8]: Lambda/MetricTag (metric type - EXCLUDED from filename) # params[9]: PostLambda (final operator) @@ -115,7 +115,7 @@ def generate_filename(params): get_type_abbreviation(params[4]), # T get_type_abbreviation(params[5]), # AccT get_type_abbreviation(params[6]), # IdxT - get_filter_abbreviation(params[7]), # FilterT + # params[7] EXCLUDED - filter # params[8] EXCLUDED - metric get_final_op_abbreviation(params[9]) # PostLambda ] @@ -124,7 +124,7 @@ def generate_filename(params): def generate_register_function_name(params): - """Generate the registration function name from template parameters (WITHOUT metric).""" + """Generate the registration function name from template parameters (WITHOUT metric and filter).""" parts = [ params[0], # Capacity params[1], # Veclen @@ -133,7 +133,7 @@ def generate_register_function_name(params): get_type_abbreviation(params[4]), # T get_type_abbreviation(params[5]), # AccT get_type_abbreviation(params[6]), # IdxT - get_filter_abbreviation(params[7]), # FilterT + # params[7] EXCLUDED - filter # params[8] EXCLUDED - metric get_final_op_abbreviation(params[9]) # PostLambda ] @@ -228,26 +228,27 @@ def generate_cuda_file_content(params): filename = generate_register_function_name(params) embedded_var_name = f"embedded_{filename}" - # The kernel now has 9 template parameters (removed MetricTag) + # The kernel now has 8 template parameters (removed MetricTag and FilterT) # params[0-3]: Capacity, Veclen, Ascending, ComputeNorm # params[4]: T (data type) # params[5]: AccT (accumulator type) # params[6]: IdxT (index type) - # params[7]: IvfSampleFilterT (filter type) + # params[7]: IvfSampleFilterT (filter type - NOT used in template anymore) # params[8]: Lambda (metric - NOT used in template anymore) # params[9]: PostLambda (post-processing operator) - # Template parameters without MetricTag (params 0-7, 9) - template_params_list = params[0:8] + [params[9]] + # Template parameters without MetricTag and FilterT (params 0-6, 9) + template_params_list = params[0:7] + [params[9]] template_params = ', '.join(template_params_list) - # Convert params 4-7 and 9 to tag types for registerAlgorithm (NO metric tag) - tag_params = [param_to_tag(i, params[i], params) for i in [4, 5, 6, 7, 9]] + # Convert params 4-6 and 9 to tag types for registerAlgorithm (NO metric/filter tags) + tag_params = [param_to_tag(i, params[i], params) for i in [4, 5, 6, 9]] register_template_params = ', '.join(tag_params) # Create the string parameter with first four params (Capacity, Veclen, Ascending, ComputeNorm) string_param = f"interleaved_scan_kernel_{params[0]}_{params[1]}_{params[2]}_{params[3]}" + # Function parameters for the kernel instantiation (updated signature) content = f"""/* * Copyright (c) 2025, NVIDIA CORPORATION. * @@ -266,11 +267,11 @@ def generate_cuda_file_content(params): #ifdef BUILD_KERNEL -#include "../../ivf_flat_interleaved_scan.cuh" +#include "../../ivf_flat_interleaved_scan_kernel.cuh" namespace cuvs::neighbors::ivf_flat::detail {{ -template __global__ void interleaved_scan_kernel<{template_params}>({params[9]}, unsigned int, {params[4]} const*, unsigned int const*, {params[4]} const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, {params[7]}, unsigned int*, float*); +template __global__ void interleaved_scan_kernel<{template_params}>({params[9]}, unsigned int, {params[4]} const*, unsigned int const*, {params[4]} const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, {params[6]}* const* const, unsigned int*, {params[6]}, {params[6]}, unsigned int*, float*); }} // namespace cuvs::neighbors::ivf_flat::detail @@ -382,6 +383,61 @@ def generate_metric_device_function_content(metric_name, veclen, data_type, acc_ return content +def generate_filter_device_function_content(filter_name): + """Generate content for a filter device function file.""" + # Determine which header to include based on filter name + if filter_name == 'filter_none': + header_file = '../filter_none.cuh' + elif filter_name == 'filter_bitset': + header_file = '../filter_bitset.cuh' + else: + raise ValueError(f"Unknown filter: {filter_name}") + + content = f"""/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef BUILD_KERNEL + +#include "{header_file}" + +namespace cuvs::neighbors::ivf_flat::detail {{ + +template __device__ bool sample_filter(int64_t* const* const inds_ptrs, const uint32_t query_ix, const uint32_t cluster_ix, const uint32_t sample_ix, uint32_t* bitset_ptr, int64_t bitset_len, int64_t original_nbits); + +}} // namespace cuvs::neighbors::ivf_flat::detail + +#else + +#include "{filter_name}.h" +#include +#include "../interleaved_scan_tags.hpp" + +__attribute__((__constructor__)) static void register_{filter_name}() +{{ +using namespace cuvs::neighbors::ivf_flat::detail; +registerAlgorithm("{filter_name}", + embedded_{filter_name}, + sizeof(embedded_{filter_name})); +}} + +#endif +""" + return content + + def generate_metric_device_functions(script_dir): """Generate all metric device function files.""" # Define all combinations we need @@ -439,6 +495,37 @@ def generate_metric_device_functions(script_dir): return generated_files +def generate_filter_device_functions(script_dir): + """Generate all filter device function files.""" + filters = ['filter_none', 'filter_bitset'] + + output_dir = script_dir / 'filter_device_functions' + output_dir.mkdir(parents=True, exist_ok=True) + + generated_files = [] + + for filter_name in filters: + filename = f"{filter_name}.cu" + file_content = generate_filter_device_function_content(filter_name) + + # Write file only if it doesn't exist or content has changed + output_file = output_dir / filename + should_write = True + if output_file.exists(): + with open(output_file, 'r') as f: + existing_content = f.read() + should_write = (existing_content != file_content) + + if should_write: + with open(output_file, 'w') as f: + f.write(file_content) + + generated_files.append(filename) + + print(f"Generated {len(generated_files)} filter device function files") + return generated_files + + def main(): # Get the script directory to find the kernels file script_dir = Path(__file__).parent.absolute() @@ -520,6 +607,9 @@ def main(): # Generate metric device function files metric_files = generate_metric_device_functions(script_dir) + # Generate filter device function files + filter_files = generate_filter_device_functions(script_dir) + # Generate CMake file with all filenames # We're generating in the source tree at: cpp/src/neighbors/ivf_flat/jit_lto_kernels/ # CMake file goes to: cpp/cmake/jit_lto_kernels_list/ @@ -539,6 +629,12 @@ def main(): cmake_content += "set(METRIC_DEVICE_FUNCTION_FILES\n" for filename in sorted(metric_files): cmake_content += f" src/neighbors/ivf_flat/jit_lto_kernels/metric_device_functions/{filename}\n" + cmake_content += ")\n\n" + + # Add filter device function files + cmake_content += "set(FILTER_DEVICE_FUNCTION_FILES\n" + for filename in sorted(filter_files): + cmake_content += f" src/neighbors/ivf_flat/jit_lto_kernels/filter_device_functions/{filename}\n" cmake_content += ")\n" # Only write if content has changed diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp index 8d2d4b9f56..dbde1bf5f7 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -42,11 +42,14 @@ struct InterleavedScanPlanner : AlgorithmPlanner { auto key = metric_name + "_" + std::to_string(Veclen); auto params = make_fragment_key(); std::cout << "Looking for metric fragment: " << key + "_" + params << std::endl; - auto metric_fragment = db.cache.find(key + "_" + params); - if (metric_fragment == db.cache.end()) { - std::cout << "Metric fragment not found" << std::endl; - return; - } - fragments.push_back(metric_fragment->second.get()); + this->device_functions.push_back(key + "_" + params); + } + + void add_filter_device_function(std::string filter_name) + { + auto& db = fragment_database(); + auto key = filter_name + "_"; + std::cout << "Looking for filter fragment: " << key << std::endl; + this->device_functions.push_back(key); } }; diff --git a/cpp/src/neighbors/sample_filter.cuh b/cpp/src/neighbors/sample_filter.cuh index 4df4794206..d016cb9ab8 100644 --- a/cpp/src/neighbors/sample_filter.cuh +++ b/cpp/src/neighbors/sample_filter.cuh @@ -66,8 +66,8 @@ struct takes_three_args< * @tparam filter_t */ template -ivf_to_sample_filter::ivf_to_sample_filter(const index_t* const* inds_ptrs, - const filter_t next_filter) +_RAFT_HOST_DEVICE ivf_to_sample_filter::ivf_to_sample_filter( + const index_t* const* inds_ptrs, const filter_t next_filter) : inds_ptrs_{inds_ptrs}, next_filter_{next_filter} { } From 84c6020bce824576614bcd5f6c4ef157e33ebac5 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Oct 2025 03:13:48 +0000 Subject: [PATCH 009/158] lto post lambda, cleanup files, generate cmake in build dir --- .gitignore | 6 - cpp/CMakeLists.txt | 4 +- cpp/cmake/modules/generate_header.cmake | 2 +- .../generate_interleaved_scan_kernels.cmake | 29 +-- .../cuvs/detail/jit_lto/FragmentEntry.h | 30 ---- .../cuvs/detail/jit_lto/MakeFragmentKey.h | 1 - cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 170 +----------------- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 7 - cpp/src/detail/jit_lto/FragmentDatabase.cu | 10 +- cpp/src/detail/jit_lto/FragmentEntry.cu | 28 --- .../ivf_flat/ivf_flat_interleaved_scan.cuh | 40 ++--- .../ivf_flat_interleaved_scan_kernel.cuh | 11 +- .../jit_lto_kernels/generate_kernels.py | 170 ++++++++++++++---- .../interleaved_scan_planner.hpp | 11 +- .../ivf_flat/jit_lto_kernels/post_compose.cuh | 31 ++++ .../jit_lto_kernels/post_identity.cuh} | 21 +-- .../ivf_flat/jit_lto_kernels/post_sqrt.cuh | 29 +++ dependencies.yaml | 8 +- python/libcuvs/pyproject.toml | 4 +- 19 files changed, 269 insertions(+), 343 deletions(-) create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh rename cpp/src/{detail/jit_lto/MakeFragmentKey.cu => neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh} (67%) create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh diff --git a/.gitignore b/.gitignore index bea7afa8e0..3627558ff5 100644 --- a/.gitignore +++ b/.gitignore @@ -88,9 +88,3 @@ ivf_pq_index # java .classpath - -# jit lto kernels -cpp/cmake/jit_lto_kernels_list/ -cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels/interleaved_scan_kernel_*.cu -cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_device_functions/*.cu -cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_device_functions/*.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index dbeb2f3a50..d6554fc162 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -338,7 +338,6 @@ if(NOT BUILD_CPU_ONLY) src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/FragmentDatabase.cu src/detail/jit_lto/FragmentEntry.cu - src/detail/jit_lto/MakeFragmentKey.cu ) add_library( @@ -575,6 +574,7 @@ if(NOT BUILD_CPU_ONLY) ${INTERLEAVED_SCAN_KERNEL_FILES} ${METRIC_DEVICE_FUNCTION_FILES} ${FILTER_DEVICE_FUNCTION_FILES} + ${POST_LAMBDA_DEVICE_FUNCTION_FILES} ) # Make sure the kernels are generated before we try to build them @@ -583,6 +583,7 @@ if(NOT BUILD_CPU_ONLY) target_compile_definitions(jit_lto_fatbins PRIVATE BUILD_KERNEL) target_include_directories(jit_lto_fatbins PRIVATE "$" + "$" ) target_compile_options(jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size) target_compile_options( @@ -616,6 +617,7 @@ if(NOT BUILD_CPU_ONLY) target_include_directories(jit_lto_fatbins_as_cpp_sources PRIVATE "$" + "$" ) # target_compile_options( # jit_lto_fatbins_as_cpp_sources PRIVATE "$<$:${CUVS_CXX_FLAGS}>" diff --git a/cpp/cmake/modules/generate_header.cmake b/cpp/cmake/modules/generate_header.cmake index f9f3e09439..a095d76775 100644 --- a/cpp/cmake/modules/generate_header.cmake +++ b/cpp/cmake/modules/generate_header.cmake @@ -59,4 +59,4 @@ endforeach() # Create a stamp file to indicate completion file(WRITE "${STAMP_FILE}" "Headers generated: ${generated_headers}") list(LENGTH generated_headers num_headers) -message(STATUS "Generated ${num_headers} individual FATBIN headers") +message(VERBOSE "Generated ${num_headers} individual FATBIN headers") diff --git a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake index 7fd82f8ac7..0a7049afe3 100644 --- a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake +++ b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake @@ -18,14 +18,14 @@ function(generate_interleaved_scan_kernels) set(KERNEL_LIST_FILE ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt) set(GENERATOR_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py) - set(OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels) - set(CMAKE_LIST_FILE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/jit_lto_kernels_list/interleaved_scan.cmake) + set(OUTPUT_BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated_kernels) + set(CMAKE_LIST_FILE ${OUTPUT_BASE_DIR}/interleaved_scan.cmake) set(STAMP_FILE ${CMAKE_CURRENT_BINARY_DIR}/kernels_generated.stamp) # Generate the kernels at build time add_custom_command( OUTPUT ${STAMP_FILE} - COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} + COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} ${OUTPUT_BASE_DIR} COMMAND ${CMAKE_COMMAND} -E touch ${STAMP_FILE} DEPENDS ${KERNEL_LIST_FILE} ${GENERATOR_SCRIPT} COMMENT "Generating interleaved scan kernel files..." @@ -40,9 +40,9 @@ function(generate_interleaved_scan_kernels) # Include the generated CMake list file # Only generate if the CMake list file doesn't exist if(NOT EXISTS ${CMAKE_LIST_FILE}) - message(STATUS "Generating interleaved scan kernels for the first time...") + message(VERBOSE "Generating interleaved scan kernels for the first time...") execute_process( - COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} + COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} ${OUTPUT_BASE_DIR} RESULT_VARIABLE GENERATION_RESULT OUTPUT_VARIABLE GENERATION_OUTPUT ERROR_VARIABLE GENERATION_ERROR @@ -56,28 +56,35 @@ function(generate_interleaved_scan_kernels) # Include the generated CMake file include(${CMAKE_LIST_FILE}) - # Prepend the source directory path to all kernel files + # Prepend the binary directory path to all kernel files set(FULL_PATH_KERNEL_FILES) foreach(kernel_file ${INTERLEAVED_SCAN_KERNEL_FILES}) - list(APPEND FULL_PATH_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${kernel_file}) + list(APPEND FULL_PATH_KERNEL_FILES ${CMAKE_CURRENT_BINARY_DIR}/${kernel_file}) endforeach() - # Prepend the source directory path to all metric device function files + # Prepend the binary directory path to all metric device function files set(FULL_PATH_METRIC_FILES) foreach(metric_file ${METRIC_DEVICE_FUNCTION_FILES}) - list(APPEND FULL_PATH_METRIC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${metric_file}) + list(APPEND FULL_PATH_METRIC_FILES ${CMAKE_CURRENT_BINARY_DIR}/${metric_file}) endforeach() - # Prepend the source directory path to all filter device function files + # Prepend the binary directory path to all filter device function files set(FULL_PATH_FILTER_FILES) foreach(filter_file ${FILTER_DEVICE_FUNCTION_FILES}) - list(APPEND FULL_PATH_FILTER_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${filter_file}) + list(APPEND FULL_PATH_FILTER_FILES ${CMAKE_CURRENT_BINARY_DIR}/${filter_file}) + endforeach() + + # Prepend the binary directory path to all post lambda device function files + set(FULL_PATH_POST_LAMBDA_FILES) + foreach(post_lambda_file ${POST_LAMBDA_DEVICE_FUNCTION_FILES}) + list(APPEND FULL_PATH_POST_LAMBDA_FILES ${CMAKE_CURRENT_BINARY_DIR}/${post_lambda_file}) endforeach() # Return the lists to parent scope set(INTERLEAVED_SCAN_KERNEL_FILES ${FULL_PATH_KERNEL_FILES} PARENT_SCOPE) set(METRIC_DEVICE_FUNCTION_FILES ${FULL_PATH_METRIC_FILES} PARENT_SCOPE) set(FILTER_DEVICE_FUNCTION_FILES ${FULL_PATH_FILTER_FILES} PARENT_SCOPE) + set(POST_LAMBDA_DEVICE_FUNCTION_FILES ${FULL_PATH_POST_LAMBDA_FILES} PARENT_SCOPE) set(INTERLEAVED_SCAN_KERNELS_STAMP ${STAMP_FILE} PARENT_SCOPE) set(INTERLEAVED_SCAN_KERNELS_TARGET generate_interleaved_scan_kernels_target PARENT_SCOPE) endfunction() diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h index d34229073b..3092180444 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h @@ -34,36 +34,6 @@ struct FragmentEntry { std::string compute_key{}; }; -// struct FragmentEntryHash { -// using is_transparent = void; - -// std::size_t operator()(std::unique_ptr const& entry) const noexcept { -// return std::hash{}(entry->compute_key); -// } -// std::size_t operator()(FragmentEntry const* entry) const noexcept { -// return std::hash{}(entry->compute_key); -// } -// std::size_t operator()(std::vector const& params) const noexcept; -// }; - -// struct FragmentEntryEqual { -// using is_transparent = void; - -// template -// bool operator()(T const& t, U const& u) const { -// return std::to_address(t) == std::to_address(u); -// } - -// bool operator()(std::unique_ptr const& entry, -// std::string const& params) const noexcept -// { -// return this->operator()(params, entry); -// } - -// bool operator()(std::string const& params, -// std::unique_ptr const& entry) const noexcept; -// }; - struct FatbinFragmentEntry final : FragmentEntry { FatbinFragmentEntry(std::string const& params, unsigned char const* view, std::size_t size); diff --git a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h index e35c5c6c62..d7f37e012d 100644 --- a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h +++ b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h @@ -21,7 +21,6 @@ #include namespace detail { -std::string nvrtc_name(std::type_info const& info); template std::string type_as_string() diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index c2ac186467..d3aecac5d8 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -18,56 +18,11 @@ #include -AlgorithmLauncher::AlgorithmLauncher(CUlibrary l, CUkernel k) : library{l}, kernel{k} -{ - // // Validate that we have a valid kernel - // if (kernel == nullptr) { - // std::cerr << "ERROR: AlgorithmLauncher constructed with null kernel" << std::endl; - // } - // if (library == nullptr) { - // std::cerr << "ERROR: AlgorithmLauncher constructed with null library" << std::endl; - // } - // std::cout << "AlgorithmLauncher constructed with kernel: " << kernel << ", library: " << - // library - // << std::endl; -} +AlgorithmLauncher::AlgorithmLauncher(CUlibrary l, CUkernel k) : library{l}, kernel{k} {} void AlgorithmLauncher::call( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) { - // std::cout << "In the launcher" << std::endl; - - // // Validate inputs - // if (kernel == nullptr) { - // std::cerr << "ERROR: Cannot launch null kernel" << std::endl; - // return; - // } - - // if (grid.x == 0 || grid.y == 0 || grid.z == 0) { - // std::cerr << "ERROR: Invalid grid dimensions: " << grid.x << "x" << grid.y << "x" << grid.z - // << std::endl; - // return; - // } - - // if (block.x == 0 || block.y == 0 || block.z == 0) { - // std::cerr << "ERROR: Invalid block dimensions: " << block.x << "x" << block.y << "x" << - // block.z - // << std::endl; - // return; - // } - - // std::cout << "Grid: " << grid.x << "x" << grid.y << "x" << grid.z << ", Block: " << block.x << - // "x" - // << block.y << "x" << block.z << ", Shared mem: " << shared_mem << std::endl; - - // // Debug kernel arguments - // if (kernel_args != nullptr) { - // std::cout << "Kernel arguments pointer: " << kernel_args << std::endl; - // // Note: We can't safely dereference kernel_args without knowing the types, - // // but we can at least check if the pointer is valid - // } else { - // std::cout << "WARNING: kernel_args is null" << std::endl; - // } CUlaunchAttribute attribute[1]; attribute[0].id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION; attribute[0].value.programmaticStreamSerializationAllowed = 1; @@ -84,130 +39,7 @@ void AlgorithmLauncher::call( config.attrs = attribute; config.numAttrs = 1; - // std::cout << "Launching kernel" << std::endl; - - // // Check CUDA context - // CUcontext ctx; - // CUresult ctx_result = cuCtxGetCurrent(&ctx); - // if (ctx_result != CUDA_SUCCESS) { - // std::cerr << "ERROR: No active CUDA context. Error: " << ctx_result << std::endl; - // return; - // } - - // // Check stream validity - // if (stream == nullptr) { - // std::cerr << "ERROR: Stream is null" << std::endl; - // return; - // } - // std::cout << "Stream: " << stream << std::endl; - - // // Check device properties for debugging - // int device; - // cudaGetDevice(&device); - // cudaDeviceProp prop; - // cudaGetDeviceProperties(&prop, device); - // std::cout << "Device: " << device << " (" << prop.name << ")" << std::endl; - // std::cout << "Max threads per block: " << prop.maxThreadsPerBlock << std::endl; - // std::cout << "Max shared memory per block: " << prop.sharedMemPerBlock << " bytes" << - // std::endl; - - // // Check if our launch parameters are within limits - // int total_threads = block.x * block.y * block.z; - // if (total_threads > prop.maxThreadsPerBlock) { - // std::cerr << "ERROR: Block size exceeds max threads per block (" << total_threads << " > " - // << prop.maxThreadsPerBlock << ")" << std::endl; - // return; - // } - - // if (shared_mem > prop.sharedMemPerBlock) { - // std::cerr << "ERROR: Shared memory exceeds max per block (" << shared_mem << " > " - // << prop.sharedMemPerBlock << ")" << std::endl; - // return; - // } - - // // Launch kernel and check for errors - // std::cout << "About to launch kernel with cuLaunchKernelEx..." << std::endl; - // CUresult launch_result = cuLaunchKernelEx(&config, (CUfunction)kernel, kernel_args, 0); - // if (launch_result != CUDA_SUCCESS) { - // std::cerr << "ERROR: Kernel launch failed with error: " << launch_result << std::endl; - // std::cerr << "This suggests the kernel function is invalid or there's a parameter issue" - // << std::endl; - // return; - // } - // std::cout << "cuLaunchKernelEx returned successfully" << std::endl; - - // std::cout << "Kernel launched successfully, synchronizing stream..." << std::endl; - - // // Check for CUDA runtime errors before synchronization - // cudaError_t cuda_err = cudaGetLastError(); - // if (cuda_err != cudaSuccess) { - // std::cerr << "ERROR: CUDA error before sync: " << cudaGetErrorString(cuda_err) << std::endl; - // return; - // } - - // Add timeout mechanism for debugging - // std::cout << "Starting stream synchronization (this may hang if kernel is stuck)..." << - // std::endl; - - // Try to get stream status first - // cudaStreamQuery(stream); - // cuda_err = cudaGetLastError(); - // if (cuda_err != cudaSuccess && cuda_err != cudaErrorNotReady) { - // std::cerr << "ERROR: Stream query failed: " << cudaGetErrorString(cuda_err) << std::endl; - // return; - // } - - // std::cout << "Stream query completed, proceeding with synchronization..." << std::endl; - - // // Let's try a different approach - check if the kernel is actually running - // std::cout << "About to call cudaStreamSynchronize - this is where it hangs..." << std::endl; - - // // First, let's try to see if we can get any information about the kernel - // std::cout << "Checking kernel function pointer: " << kernel << std::endl; - - // // Try to get kernel attributes - // int max_threads = 0; - // CUresult attr_result = - // cuFuncGetAttribute(&max_threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, - // (CUfunction)kernel); - // if (attr_result == CUDA_SUCCESS) { - // std::cout << "Kernel function appears to be valid, max threads per block: " << max_threads - // << std::endl; - // } else { - // std::cerr << "WARNING: Could not get kernel attributes, error: " << attr_result << std::endl; - // std::cerr << "This suggests the kernel function might be invalid!" << std::endl; - // } - - // // Try to get kernel name - // const char* kernel_name = nullptr; - // CUresult name_result = cuFuncGetName(&kernel_name, (CUfunction)kernel); - // if (name_result == CUDA_SUCCESS && kernel_name != nullptr) { - // std::cout << "Kernel name: " << kernel_name << std::endl; - // } else { - // std::cerr << "WARNING: Could not get kernel name, error: " << name_result << std::endl; - // } - - // // Now try the synchronization - this is where it hangs - // std::cout << "Calling cudaStreamSynchronize now..." << std::endl; - - // // Try using CUDA Driver API instead of runtime API - // CUstream cu_stream = (CUstream)stream; - // CUresult sync_result = cuStreamSynchronize(cu_stream); - // if (sync_result != CUDA_SUCCESS) { - // std::cerr << "ERROR: cuStreamSynchronize failed with error: " << sync_result << std::endl; - // return; - // } - // std::cout << "cuStreamSynchronize returned successfully!" << std::endl; - - // // Check for errors after synchronization - // cuda_err = cudaGetLastError(); - // if (cuda_err != cudaSuccess) { - // std::cerr << "ERROR: CUDA error after sync: " << cudaGetErrorString(cuda_err) << std::endl; - // return; - // } - - // std::cout << "Launched kernel" << std::endl; } std::unordered_map& get_cached_launchers() diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 44b6914112..f7c7a63468 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -51,8 +51,6 @@ void AlgorithmPlanner::add_entrypoint() { auto entrypoint_fragment = fragment_database().get_fragment(this->entrypoint); this->fragments.push_back(entrypoint_fragment); - std::cout << "Fragment added with key: " << fragments.back()->compute_key << std::endl; - std::cout << "Fragments size: " << this->fragments.size() << std::endl; } void AlgorithmPlanner::add_device_functions() @@ -60,8 +58,6 @@ void AlgorithmPlanner::add_device_functions() for (const auto& device_function_key : this->device_functions) { auto device_function_fragment = fragment_database().get_fragment(device_function_key); this->fragments.push_back(device_function_fragment); - std::cout << "Fragment added with key: " << fragments.back()->compute_key << std::endl; - std::cout << "Fragments size: " << this->fragments.size() << std::endl; } } @@ -84,7 +80,6 @@ AlgorithmLauncher AlgorithmPlanner::get_launcher() add_device_functions(); launchers[launch_key] = this->build(); } - std::cout << "launcher key: " << launch_key << std::endl; return launchers[launch_key]; } @@ -111,10 +106,8 @@ AlgorithmLauncher AlgorithmPlanner::build() // Call to nvJitLinkComplete causes linker to link together all the LTO-IR // modules perform any optimizations and generate cubin from it. - std::cout << "\tStarted LTO runtime linking \n"; result = nvJitLinkComplete(handle); check_nvjitlink_result(handle, result); - std::cout << "\tCompleted LTO runtime linking \n"; // get cubin from nvJitLink size_t cubin_size; diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index 0bdc7cdc88..493405a170 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -39,17 +39,10 @@ FragmentDatabase& fragment_database() FragmentEntry* FragmentDatabase::get_fragment(std::string const& key) { - std::cout << "Saving compute" << std::endl; auto& db = fragment_database(); - std::cout << "DB size: " << db.cache.size() << std::endl; - std::cout << "Available keys in cache:" << std::endl; - for (const auto& pair : db.cache) { - std::cout << " " << pair.first << std::endl; - } - std::cout << "Finding key: " << key << std::endl; auto val = db.cache.find(key); if (val == db.cache.end()) { - std::cout << "Key not found" << std::endl; + std::cout << "FragmentDatabase: Key not found" << std::endl; return nullptr; } return val->second.get(); @@ -63,6 +56,5 @@ void registerFatbinFragment(std::string const& algo, auto& planner = fragment_database(); auto entry_exists = planner.make_cache_entry(algo, params); if (entry_exists) { return; } - std::cout << "Caching fatbin fragment: " << algo + "_" + params << std::endl; planner.cache[algo + "_" + params] = std::make_unique(params, blob, size); } diff --git a/cpp/src/detail/jit_lto/FragmentEntry.cu b/cpp/src/detail/jit_lto/FragmentEntry.cu index b76df1bd29..5924974faa 100644 --- a/cpp/src/detail/jit_lto/FragmentEntry.cu +++ b/cpp/src/detail/jit_lto/FragmentEntry.cu @@ -20,13 +20,6 @@ #include namespace { -// std::string make_compute_key(std::vector const& params) { -// std::string k{}; -// for (auto& p : params) { -// k += p + "_"; -// } -// return k; -// } // We can make a better RAII wrapper around nvjitlinkhandle void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) @@ -49,21 +42,6 @@ void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) FragmentEntry::FragmentEntry(std::string const& params) : compute_key(params) {} -// std::size_t FragmentEntryHash::operator()( -// std::vector const& params) const noexcept { -// return std::hash{}(make_compute_key(params)); -// } - -// bool FragmentEntryEqual::operator()( -// std::vector const& params, -// std::unique_ptr const& entry) const noexcept { -// if (params.size() == entry->compute_arg_count) { -// auto key = make_compute_key(params); -// return entry->compute_key == key; -// } -// return false; -// } - FatbinFragmentEntry::FatbinFragmentEntry(std::string const& params, unsigned char const* view, std::size_t size) @@ -76,12 +54,6 @@ bool FatbinFragmentEntry::add_to(nvJitLinkHandle& handle) const auto result = nvJitLinkAddData( handle, NVJITLINK_INPUT_ANY, this->data_view, this->data_size, this->compute_key.c_str()); - // Loading from file works - // So the issue is in our data_view / data_size - // auto result = nvJitLinkAddFile( - // handle, NVJITLINK_INPUT_ANY, - // "/home/rmaynard/Work/runtime_lto_examples/build/algorithms/CMakeFiles/" - // "algo_kernels.dir/kernels/sum_int32.fatbin"); check_nvjitlink_result(handle, result); return true; } diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index 9ddadb3ecf..596c0dcea3 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -118,17 +118,12 @@ constexpr auto get_filter_name() } } -template -constexpr auto get_post_lambda_tag() +template +constexpr auto get_post_lambda_name() { - using namespace raft; - - if constexpr (std::is_same_v) { return tag_post_identity{}; } - if constexpr (std::is_same_v) { return tag_post_sqrt{}; } - if constexpr (std::is_same_v, raft::mul_const_op>>) { - return tag_post_compose{}; - } + if constexpr (std::is_same_v) { return "post_identity"; } + if constexpr (std::is_same_v) { return "post_sqrt"; } + if constexpr (std::is_same_v) { return "post_compose"; } } /** @@ -162,9 +157,8 @@ template -void launch_kernel(PostLambda post_process, - const index& index, + typename PostLambdaTag> +void launch_kernel(const index& index, const T* queries, const uint32_t* coarse_index, const uint32_t num_queries, @@ -196,16 +190,21 @@ void launch_kernel(PostLambda post_process, // PostLambda>; // Use tag types for the planner to avoid template bloat + auto start_time = std::chrono::high_resolution_clock::now(); auto kernel_planner = InterleavedScanPlanner()), decltype(get_acc_type_tag()), - decltype(get_idx_type_tag()), - decltype(get_post_lambda_tag())>( + decltype(get_idx_type_tag())>( Capacity, Veclen, Ascending, ComputeNorm); kernel_planner.template add_metric_device_function()), decltype(get_acc_type_tag())>( get_metric_name(), Veclen); kernel_planner.add_filter_device_function(get_filter_name()); + kernel_planner.add_post_lambda_device_function(get_post_lambda_name()); auto kernel_launcher = kernel_planner.get_launcher(); + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end_time - start_time); + std::cout << "Time taken to get kernel launcher: " << duration.count() << " microseconds" + << std::endl; const int max_query_smem = 16384; int query_smem_elems = std::min(max_query_smem / sizeof(T), @@ -261,7 +260,6 @@ void launch_kernel(PostLambda post_process, grid_dim, block_dim, smem_size, - post_process, query_smem_elems, queries, coarse_index, @@ -315,7 +313,7 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg IdxT, IvfSampleFilterTag, tag_metric_euclidean, - raft::identity_op>({}, std::forward(args)...); + tag_post_identity>(std::forward(args)...); case cuvs::distance::DistanceType::L2SqrtExpanded: case cuvs::distance::DistanceType::L2SqrtUnexpanded: return launch_kernel, - raft::sqrt_op>({}, std::forward(args)...); + tag_post_sqrt>(std::forward(args)...); case cuvs::distance::DistanceType::InnerProduct: return launch_kernel, - raft::identity_op>({}, std::forward(args)...); + tag_post_identity>(std::forward(args)...); case cuvs::distance::DistanceType::CosineExpanded: // NB: "Ascending" is reversed because the post-processing step is done after that sort return launch_kernel>( - raft::compose_op(raft::add_const_op{1.0f}, raft::mul_const_op{-1.0f}), + tag_metric_inner_product, + tag_post_compose>( std::forward(args)...); // NB: update the description of `knn::ivf_flat::build` when // adding here a new metric. default: RAFT_FAIL("The chosen distance metric is not supported (%d)", int(metric)); diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh index 7086bafd15..83f4984319 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh @@ -45,6 +45,9 @@ extern __device__ bool sample_filter(index_t* const* const inds_ptrs, index_t bitset_len, index_t original_nbits); +template +extern __device__ T post_process(T val); + /** * @brief Copy `n` elements per block from one place to another. * @@ -817,11 +820,9 @@ template + typename IdxT> RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) - interleaved_scan_kernel(PostLambda post_process, - const uint32_t query_smem_elems, + interleaved_scan_kernel(const uint32_t query_smem_elems, const T* query, const uint32_t* coarse_index, const T* const* list_data_ptrs, @@ -973,7 +974,7 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) if constexpr (kManageLocalTopK) { __syncthreads(); queue.done(interleaved_scan_kernel_smem); - queue.store(distances, neighbors, post_process); + queue.store(distances, neighbors, [](auto val) { return post_process(val); }); } } diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py index da9a033152..dd9d43c290 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py @@ -95,7 +95,7 @@ def get_final_op_abbreviation(op_str): def generate_filename(params): - """Generate filename from template parameters (WITHOUT metric and filter).""" + """Generate filename from template parameters (WITHOUT metric, filter, and post lambda).""" # params[0]: Capacity (numeric) # params[1]: Veclen (numeric) # params[2]: Ascending (bool) @@ -105,7 +105,7 @@ def generate_filename(params): # params[6]: IdxT (type) # params[7]: FilterT (filter type - EXCLUDED from filename) # params[8]: Lambda/MetricTag (metric type - EXCLUDED from filename) - # params[9]: PostLambda (final operator) + # params[9]: PostLambda (final operator - EXCLUDED from filename) parts = [ params[0], # Capacity @@ -117,14 +117,14 @@ def generate_filename(params): get_type_abbreviation(params[6]), # IdxT # params[7] EXCLUDED - filter # params[8] EXCLUDED - metric - get_final_op_abbreviation(params[9]) # PostLambda + # params[9] EXCLUDED - post lambda ] return f"interleaved_scan_kernel_{'_'.join(parts)}.cu" def generate_register_function_name(params): - """Generate the registration function name from template parameters (WITHOUT metric and filter).""" + """Generate the registration function name from template parameters (WITHOUT metric, filter, and post lambda).""" parts = [ params[0], # Capacity params[1], # Veclen @@ -135,7 +135,7 @@ def generate_register_function_name(params): get_type_abbreviation(params[6]), # IdxT # params[7] EXCLUDED - filter # params[8] EXCLUDED - metric - get_final_op_abbreviation(params[9]) # PostLambda + # params[9] EXCLUDED - post lambda ] return f"interleaved_scan_kernel_{'_'.join(parts)}" @@ -237,18 +237,18 @@ def generate_cuda_file_content(params): # params[8]: Lambda (metric - NOT used in template anymore) # params[9]: PostLambda (post-processing operator) - # Template parameters without MetricTag and FilterT (params 0-6, 9) - template_params_list = params[0:7] + [params[9]] + # Template parameters without MetricTag, FilterT, and PostLambda (params 0-6) + template_params_list = params[0:7] template_params = ', '.join(template_params_list) - # Convert params 4-6 and 9 to tag types for registerAlgorithm (NO metric/filter tags) - tag_params = [param_to_tag(i, params[i], params) for i in [4, 5, 6, 9]] + # Convert params 4-6 to tag types for registerAlgorithm (NO metric/filter/postlambda tags) + tag_params = [param_to_tag(i, params[i], params) for i in [4, 5, 6]] register_template_params = ', '.join(tag_params) # Create the string parameter with first four params (Capacity, Veclen, Ascending, ComputeNorm) string_param = f"interleaved_scan_kernel_{params[0]}_{params[1]}_{params[2]}_{params[3]}" - # Function parameters for the kernel instantiation (updated signature) + # Function parameters for the kernel instantiation (updated signature - PostLambda removed) content = f"""/* * Copyright (c) 2025, NVIDIA CORPORATION. * @@ -267,11 +267,11 @@ def generate_cuda_file_content(params): #ifdef BUILD_KERNEL -#include "../../ivf_flat_interleaved_scan_kernel.cuh" +#include namespace cuvs::neighbors::ivf_flat::detail {{ -template __global__ void interleaved_scan_kernel<{template_params}>({params[9]}, unsigned int, {params[4]} const*, unsigned int const*, {params[4]} const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, {params[6]}* const* const, unsigned int*, {params[6]}, {params[6]}, unsigned int*, float*); +template __global__ void interleaved_scan_kernel<{template_params}>(unsigned int, {params[4]} const*, unsigned int const*, {params[4]} const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, {params[6]}* const* const, unsigned int*, {params[6]}, {params[6]}, unsigned int*, float*); }} // namespace cuvs::neighbors::ivf_flat::detail @@ -279,7 +279,7 @@ def generate_cuda_file_content(params): #include "{filename}.h" #include -#include "../interleaved_scan_tags.hpp" +#include __attribute__((__constructor__)) static void register_{filename}() {{ @@ -330,10 +330,10 @@ def generate_metric_device_function_content(metric_name, veclen, data_type, acc_ # Determine which header to include and implementation struct based on metric if metric_name == 'euclidean': - header_file = '../metric_euclidean_dist.cuh' + header_file = 'neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh' metric_impl = 'euclidean_dist' elif metric_name == 'inner_prod': - header_file = '../metric_inner_product.cuh' + header_file = 'neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh' metric_impl = 'inner_prod_dist' else: raise ValueError(f"Unknown metric: {metric_name}") @@ -368,7 +368,7 @@ def generate_metric_device_function_content(metric_name, veclen, data_type, acc_ #include "{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}.h" #include -#include "../interleaved_scan_tags.hpp" +#include __attribute__((__constructor__)) static void register_{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}() {{ @@ -387,9 +387,9 @@ def generate_filter_device_function_content(filter_name): """Generate content for a filter device function file.""" # Determine which header to include based on filter name if filter_name == 'filter_none': - header_file = '../filter_none.cuh' + header_file = 'neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh' elif filter_name == 'filter_bitset': - header_file = '../filter_bitset.cuh' + header_file = 'neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh' else: raise ValueError(f"Unknown filter: {filter_name}") @@ -423,7 +423,7 @@ def generate_filter_device_function_content(filter_name): #include "{filter_name}.h" #include -#include "../interleaved_scan_tags.hpp" +#include __attribute__((__constructor__)) static void register_{filter_name}() {{ @@ -438,7 +438,7 @@ def generate_filter_device_function_content(filter_name): return content -def generate_metric_device_functions(script_dir): +def generate_metric_device_functions(script_dir, output_base_dir): """Generate all metric device function files.""" # Define all combinations we need # Based on the kernel signatures, we have: @@ -457,7 +457,7 @@ def generate_metric_device_functions(script_dir): veclens = [1, 2, 4, 8, 16] metrics = ['euclidean', 'inner_prod'] - output_dir = script_dir / 'metric_device_functions' + output_dir = output_base_dir / 'metric_device_functions' output_dir.mkdir(parents=True, exist_ok=True) generated_files = [] @@ -495,11 +495,11 @@ def generate_metric_device_functions(script_dir): return generated_files -def generate_filter_device_functions(script_dir): +def generate_filter_device_functions(script_dir, output_base_dir): """Generate all filter device function files.""" filters = ['filter_none', 'filter_bitset'] - output_dir = script_dir / 'filter_device_functions' + output_dir = output_base_dir / 'filter_device_functions' output_dir.mkdir(parents=True, exist_ok=True) generated_files = [] @@ -526,7 +526,97 @@ def generate_filter_device_functions(script_dir): return generated_files +def generate_post_lambda_device_function_content(post_lambda_name): + """Generate content for a post lambda device function file.""" + # Determine which header to include based on post lambda name + if post_lambda_name == 'post_identity': + header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh' + elif post_lambda_name == 'post_sqrt': + header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh' + elif post_lambda_name == 'post_compose': + header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh' + else: + raise ValueError(f"Unknown post lambda: {post_lambda_name}") + + content = f"""/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef BUILD_KERNEL + +#include "{header_file}" + +namespace cuvs::neighbors::ivf_flat::detail {{ + +template __device__ float post_process(float val); + +}} // namespace cuvs::neighbors::ivf_flat::detail + +#else + +#include "{post_lambda_name}.h" +#include +#include + +__attribute__((__constructor__)) static void register_{post_lambda_name}() +{{ +using namespace cuvs::neighbors::ivf_flat::detail; +registerAlgorithm("{post_lambda_name}", + embedded_{post_lambda_name}, + sizeof(embedded_{post_lambda_name})); +}} + +#endif +""" + return content + + +def generate_post_lambda_device_functions(script_dir, output_base_dir): + """Generate all post lambda device function files.""" + post_lambdas = ['post_identity', 'post_sqrt', 'post_compose'] + + output_dir = output_base_dir / 'post_lambda_device_functions' + output_dir.mkdir(parents=True, exist_ok=True) + + generated_files = [] + + for post_lambda_name in post_lambdas: + filename = f"{post_lambda_name}.cu" + file_content = generate_post_lambda_device_function_content(post_lambda_name) + + # Write file only if it doesn't exist or content has changed + output_file = output_dir / filename + should_write = True + if output_file.exists(): + with open(output_file, 'r') as f: + existing_content = f.read() + should_write = (existing_content != file_content) + + if should_write: + with open(output_file, 'w') as f: + f.write(file_content) + + generated_files.append(filename) + + print(f"Generated {len(generated_files)} post lambda device function files") + return generated_files + + def main(): + import sys + # Get the script directory to find the kernels file script_dir = Path(__file__).parent.absolute() @@ -539,8 +629,13 @@ def main(): with open(kernels_file, 'r') as f: lines = f.readlines() - # Output directory (interleaved_scan_kernels subdirectory) - output_dir = script_dir / 'interleaved_scan_kernels' + # Output directory - use command line argument if provided, otherwise use source dir + if len(sys.argv) > 1: + output_base_dir = Path(sys.argv[1]).absolute() + else: + output_base_dir = script_dir + + output_dir = output_base_dir / 'interleaved_scan_kernels' output_dir.mkdir(parents=True, exist_ok=True) # Parse all kernels and generate files @@ -605,36 +700,45 @@ def main(): print(f"\nGenerated {len(generated_files)} CUDA kernel files") # Generate metric device function files - metric_files = generate_metric_device_functions(script_dir) + metric_files = generate_metric_device_functions(script_dir, output_base_dir) # Generate filter device function files - filter_files = generate_filter_device_functions(script_dir) + filter_files = generate_filter_device_functions(script_dir, output_base_dir) + + # Generate post lambda device function files + post_lambda_files = generate_post_lambda_device_functions(script_dir, output_base_dir) # Generate CMake file with all filenames - # We're generating in the source tree at: cpp/src/neighbors/ivf_flat/jit_lto_kernels/ - # CMake file goes to: cpp/cmake/jit_lto_kernels_list/ - cmake_dir = script_dir.parent.parent.parent.parent / 'cmake' / 'jit_lto_kernels_list' + # CMake file goes to the binary directory + cmake_dir = output_base_dir cmake_dir.mkdir(parents=True, exist_ok=True) cmake_file = cmake_dir / 'interleaved_scan.cmake' # Generate CMake content + # Paths are now relative to CMAKE_CURRENT_BINARY_DIR cmake_content = "# Auto-generated list of interleaved scan kernel files\n" cmake_content += "# Generated by generate_kernels.py\n\n" cmake_content += "set(INTERLEAVED_SCAN_KERNEL_FILES\n" for filename in sorted(generated_files): - cmake_content += f" src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels/{filename}\n" + cmake_content += f" generated_kernels/interleaved_scan_kernels/{filename}\n" cmake_content += ")\n\n" # Add metric device function files cmake_content += "set(METRIC_DEVICE_FUNCTION_FILES\n" for filename in sorted(metric_files): - cmake_content += f" src/neighbors/ivf_flat/jit_lto_kernels/metric_device_functions/{filename}\n" + cmake_content += f" generated_kernels/metric_device_functions/{filename}\n" cmake_content += ")\n\n" # Add filter device function files cmake_content += "set(FILTER_DEVICE_FUNCTION_FILES\n" for filename in sorted(filter_files): - cmake_content += f" src/neighbors/ivf_flat/jit_lto_kernels/filter_device_functions/{filename}\n" + cmake_content += f" generated_kernels/filter_device_functions/{filename}\n" + cmake_content += ")\n\n" + + # Add post lambda device function files + cmake_content += "set(POST_LAMBDA_DEVICE_FUNCTION_FILES\n" + for filename in sorted(post_lambda_files): + cmake_content += f" generated_kernels/post_lambda_device_functions/{filename}\n" cmake_content += ")\n" # Only write if content has changed diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp index dbde1bf5f7..e6695fb08f 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -32,24 +32,25 @@ struct InterleavedScanPlanner : AlgorithmPlanner { bool_to_string(ComputeNorm), make_fragment_key()) { - std::cout << "In the planner" << std::endl; } template void add_metric_device_function(std::string metric_name, int Veclen) { - auto& db = fragment_database(); auto key = metric_name + "_" + std::to_string(Veclen); auto params = make_fragment_key(); - std::cout << "Looking for metric fragment: " << key + "_" + params << std::endl; this->device_functions.push_back(key + "_" + params); } void add_filter_device_function(std::string filter_name) { - auto& db = fragment_database(); auto key = filter_name + "_"; - std::cout << "Looking for filter fragment: " << key << std::endl; + this->device_functions.push_back(key); + } + + void add_post_lambda_device_function(std::string post_lambda_name) + { + auto key = post_lambda_name + "_"; this->device_functions.push_back(key); } }; diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh new file mode 100644 index 0000000000..ba9b60b238 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cuvs::neighbors::ivf_flat::detail { + +template +__device__ T post_process(T val) +{ + // This is for cosine distance: compose(add_const(1.0), mul_const(-1.0)) + // which computes: 1.0 + (-1.0 * val) = 1.0 - val + return raft::compose_op(raft::add_const_op{1.0f}, raft::mul_const_op{-1.0f})(val); +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/detail/jit_lto/MakeFragmentKey.cu b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh similarity index 67% rename from cpp/src/detail/jit_lto/MakeFragmentKey.cu rename to cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh index 020010bf21..7e17bf481d 100644 --- a/cpp/src/detail/jit_lto/MakeFragmentKey.cu +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh @@ -14,15 +14,16 @@ * limitations under the License. */ -#define NVRTC_GET_TYPE_NAME 1 -// #include +#pragma once -#include +#include -namespace detail { -// std::string nvrtc_name(std::type_info const& info) { -// std::string type_name; -// nvrtcGetTypeName(info, &type_name); -// return type_name; -// } -} // namespace detail +namespace cuvs::neighbors::ivf_flat::detail { + +template +__device__ T post_process(T val) +{ + return raft::identity_op{}(val); +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh new file mode 100644 index 0000000000..1cfd755445 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cuvs::neighbors::ivf_flat::detail { + +template +__device__ T post_process(T val) +{ + return raft::sqrt_op{}(val); +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/dependencies.yaml b/dependencies.yaml index 623dd6b434..2f0a882107 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -552,13 +552,13 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - nvidia-nvjitlink-cu12==25.10.*,>=0.0.0a0 + - nvidia-nvjitlink-cu12 - matrix: cuda: "13.*" - cuda_suffixed: "true" + cuda_suffixed: "false" packages: - - nvidia-nvjitlink-cu13==25.10.*,>=0.0.0a0 - - {matrix: null, packages: [libnvjitlink-dev]} + - nvidia-nvjitlink=13.* + - {matrix: null, packages: [nvidia-nvjitlink]} depends_on_libraft: common: - output_types: conda diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml index 679a3e4d09..6c30d91153 100644 --- a/python/libcuvs/pyproject.toml +++ b/python/libcuvs/pyproject.toml @@ -31,9 +31,9 @@ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ "cuda-toolkit[cublas,curand,cusolver,cusparse]>=12,<14", - "libnvjitlink-dev", "libraft==25.12.*,>=0.0.0a0", "librmm==25.12.*,>=0.0.0a0", + "nvidia-nvjitlink", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -91,10 +91,10 @@ regex = "(?P.*)" build-backend = "scikit_build_core.build" requires = [ "cmake>=3.30.4", - "libnvjitlink-dev", "libraft==25.12.*,>=0.0.0a0", "librmm==25.12.*,>=0.0.0a0", "ninja", + "nvidia-nvjitlink", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" From 22680c8d4aff8252190486c528366dcf5bee6bb6 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Oct 2025 20:30:16 +0000 Subject: [PATCH 010/158] don't read hardcoded kernels, use generator properly --- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 3 +- .../ivf_flat/ivf_flat_interleaved_scan.cuh | 46 +- .../jit_lto_kernels/generate_kernels.py | 722 +++------- .../interleaved_scan_kernels.txt | 1280 ----------------- .../jit_lto_kernels/interleaved_scan_tags.hpp | 18 +- .../jit_lto_kernels/metric_euclidean_dist.cuh | 1 - .../jit_lto_kernels/metric_inner_product.cuh | 1 - 7 files changed, 240 insertions(+), 1831 deletions(-) delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index f7c7a63468..7507ed7616 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -72,10 +73,10 @@ std::string AlgorithmPlanner::get_device_functions_key() AlgorithmLauncher AlgorithmPlanner::get_launcher() { - std::cout << "Getting launcher" << std::endl; auto& launchers = get_cached_launchers(); auto launch_key = this->entrypoint + this->get_device_functions_key(); if (launchers.count(launch_key) == 0) { + auto start = std::chrono::high_resolution_clock::now(); add_entrypoint(); add_device_functions(); launchers[launch_key] = this->build(); diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index 596c0dcea3..89acd0cb01 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -42,25 +42,25 @@ using namespace cuvs::spatial::knn::detail; // NOLINT template constexpr auto get_data_type_tag() { - if constexpr (std::is_same_v) { return tag_float{}; } - if constexpr (std::is_same_v) { return tag_half{}; } - if constexpr (std::is_same_v) { return tag_int8{}; } - if constexpr (std::is_same_v) { return tag_uint8{}; } + if constexpr (std::is_same_v) { return tag_f{}; } + if constexpr (std::is_same_v) { return tag_h{}; } + if constexpr (std::is_same_v) { return tag_sc{}; } + if constexpr (std::is_same_v) { return tag_uc{}; } } template constexpr auto get_acc_type_tag() { - if constexpr (std::is_same_v) { return tag_acc_float{}; } - if constexpr (std::is_same_v) { return tag_acc_half{}; } - if constexpr (std::is_same_v) { return tag_acc_int32{}; } - if constexpr (std::is_same_v) { return tag_acc_uint32{}; } + if constexpr (std::is_same_v) { return tag_acc_f{}; } + if constexpr (std::is_same_v) { return tag_acc_h{}; } + if constexpr (std::is_same_v) { return tag_acc_i{}; } + if constexpr (std::is_same_v) { return tag_acc_ui{}; } } template constexpr auto get_idx_type_tag() { - if constexpr (std::is_same_v) { return tag_idx_int64{}; } + if constexpr (std::is_same_v) { return tag_idx_l{}; } } template @@ -70,10 +70,10 @@ constexpr auto get_filter_type_tag() // Determine the filter implementation tag if constexpr (std::is_same_v) { - return tag_filter{}; + return tag_filter{}; } if constexpr (std::is_same_v>) { - return tag_filter{}; + return tag_filter{}; } } @@ -108,12 +108,10 @@ constexpr auto get_metric_name() template constexpr auto get_filter_name() { - if constexpr (std::is_same_v>) { + if constexpr (std::is_same_v>) { return "filter_none"; } - if constexpr (std::is_same_v>) { + if constexpr (std::is_same_v>) { return "filter_bitset"; } } @@ -178,19 +176,8 @@ void launch_kernel(const index& index, { RAFT_EXPECTS(Veclen == index.veclen(), "Configured Veclen does not match the index interleaving pattern."); - // constexpr auto kKernel = interleaved_scan_kernel; // Use tag types for the planner to avoid template bloat - auto start_time = std::chrono::high_resolution_clock::now(); auto kernel_planner = InterleavedScanPlanner()), decltype(get_acc_type_tag()), decltype(get_idx_type_tag())>( @@ -201,10 +188,6 @@ void launch_kernel(const index& index, kernel_planner.add_filter_device_function(get_filter_name()); kernel_planner.add_post_lambda_device_function(get_post_lambda_name()); auto kernel_launcher = kernel_planner.get_launcher(); - auto end_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end_time - start_time); - std::cout << "Time taken to get kernel launcher: " << duration.count() << " microseconds" - << std::endl; const int max_query_smem = 16384; int query_smem_elems = std::min(max_query_smem / sizeof(T), @@ -470,8 +453,6 @@ void ivfflat_interleaved_scan(const index& index, { const int capacity = raft::bound_by_power_of_two(k); - // auto filter_adapter = cuvs::neighbors::filtering::ivf_to_sample_filter( - // index.inds_ptrs().data_handle(), sample_filter); cuda::std::optional bitset_ptr; cuda::std::optional bitset_len; cuda::std::optional original_nbits; @@ -496,7 +477,6 @@ void ivfflat_interleaved_scan(const index& index, k, max_samples, chunk_indices, - // filter_adapter, index.inds_ptrs().data_handle(), bitset_ptr, bitset_len, diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py index dd9d43c290..a480e6ec4e 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py @@ -12,243 +12,100 @@ # the License. # ============================================================================= - #!/usr/bin/env python3 """ -Simplified script to generate CUDA kernel files for interleaved_scan_kernel instantiations. -Reads from interleaved_scan_kernels.txt and generates individual .cu files. +Generate CUDA kernel instantiation files for IVF-Flat interleaved scan. +This script generates kernel files programmatically based on type combinations. """ -import re -import os from pathlib import Path - - -def parse_template_parameters(template_str): - """Parse template parameters from a template string with nested templates.""" - params = [] - current_param = '' - depth = 0 - - for char in template_str: - if char == '<': - depth += 1 - elif char == '>': - depth -= 1 - elif char == ',' and depth == 0: - params.append(current_param.strip()) - current_param = '' - continue - current_param += char - - if current_param: - params.append(current_param.strip()) - - return params - - -def get_type_abbreviation(type_str): - """Get abbreviation for type names.""" - type_map = { - 'float': 'f', - '__half': 'h', - 'unsigned char': 'uc', - 'signed char': 'sc', - 'unsigned int': 'ui', - 'int': 'i', - 'long': 'l' - } - return type_map.get(type_str, type_str) - - -def get_filter_abbreviation(filter_str): - """Get abbreviation for filter types.""" - if 'none_sample_filter' in filter_str: - return 'n' - elif 'bitset_filter' in filter_str: - return 'b' - return 'unknown' - - -def get_distance_abbreviation(dist_str): - """Get abbreviation for distance metric types.""" - if 'inner_prod_dist' in dist_str: - match = re.search(r'inner_prod_dist<(\d+),', dist_str) - if match: - return f'inner_{match.group(1)}' - elif 'euclidean_dist' in dist_str: - match = re.search(r'euclidean_dist<(\d+),', dist_str) - if match: - return f'euclidean_{match.group(1)}' - return 'unknown' - - -def get_final_op_abbreviation(op_str): - """Get abbreviation for final operator types.""" - if 'identity_op' in op_str: - return 'id' - elif 'sqrt_op' in op_str: - return 'sqrt' - elif 'compose_op' in op_str: - return 'compose' - return 'unknown' +import itertools + + +# Define the parameter space for kernel generation +CAPACITIES = [0, 1, 2, 4, 8, 16, 32, 64, 128, 256] +ASCENDING_VALUES = [True, False] +COMPUTE_NORM_VALUES = [True, False] + +# Data type configurations: (data_type, acc_type, veclens, type_abbrev, acc_abbrev) +# Each data type has veclen=1 and one optimized larger veclen +DATA_TYPE_CONFIGS = [ + ('float', 'float', [1, 4], 'f', 'f'), + ('__half', '__half', [1, 8], 'h', 'h'), + ('uint8_t', 'uint32_t', [1, 16], 'uc', 'ui'), + ('int8_t', 'int32_t', [1, 16], 'sc', 'i'), +] + +IDX_TYPE = 'int64_t' +IDX_TYPE_ABBREV = 'l' + +# Metric configurations for device functions +METRIC_CONFIGS = [ + 'euclidean', + 'inner_prod', +] + +# Filter configurations +FILTER_CONFIGS = [ + 'filter_none', + 'filter_bitset', +] + +# Post lambda configurations +POST_LAMBDA_CONFIGS = [ + 'post_identity', + 'post_sqrt', + 'post_compose', +] + + +def generate_kernel_combinations(): + """Generate all valid kernel parameter combinations.""" + kernels = [] + + for data_type, acc_type, veclens, type_abbrev, acc_abbrev in DATA_TYPE_CONFIGS: + for capacity, veclen, ascending, compute_norm in itertools.product( + CAPACITIES, veclens, ASCENDING_VALUES, COMPUTE_NORM_VALUES + ): + kernels.append({ + 'capacity': capacity, + 'veclen': veclen, + 'ascending': ascending, + 'compute_norm': compute_norm, + 'data_type': data_type, + 'acc_type': acc_type, + 'idx_type': IDX_TYPE, + 'type_abbrev': type_abbrev, + 'acc_abbrev': acc_abbrev, + 'idx_abbrev': IDX_TYPE_ABBREV, + }) + + return kernels def generate_filename(params): - """Generate filename from template parameters (WITHOUT metric, filter, and post lambda).""" - # params[0]: Capacity (numeric) - # params[1]: Veclen (numeric) - # params[2]: Ascending (bool) - # params[3]: ComputeNorm (bool) - # params[4]: T (type) - # params[5]: AccT (type) - # params[6]: IdxT (type) - # params[7]: FilterT (filter type - EXCLUDED from filename) - # params[8]: Lambda/MetricTag (metric type - EXCLUDED from filename) - # params[9]: PostLambda (final operator - EXCLUDED from filename) - - parts = [ - params[0], # Capacity - params[1], # Veclen - params[2], # Ascending - params[3], # ComputeNorm - get_type_abbreviation(params[4]), # T - get_type_abbreviation(params[5]), # AccT - get_type_abbreviation(params[6]), # IdxT - # params[7] EXCLUDED - filter - # params[8] EXCLUDED - metric - # params[9] EXCLUDED - post lambda - ] - - return f"interleaved_scan_kernel_{'_'.join(parts)}.cu" - - -def generate_register_function_name(params): - """Generate the registration function name from template parameters (WITHOUT metric, filter, and post lambda).""" - parts = [ - params[0], # Capacity - params[1], # Veclen - params[2], # Ascending - params[3], # ComputeNorm - get_type_abbreviation(params[4]), # T - get_type_abbreviation(params[5]), # AccT - get_type_abbreviation(params[6]), # IdxT - # params[7] EXCLUDED - filter - # params[8] EXCLUDED - metric - # params[9] EXCLUDED - post lambda - ] - - return f"interleaved_scan_kernel_{'_'.join(parts)}" - - -def param_to_tag(param_index, param_value, all_params): - """Convert a parameter to its corresponding tag type. - - param_index: Index of the parameter (0-9) - param_value: The actual parameter value (C++ type string) - all_params: All 10 parameters (needed for templated tags) - """ - # Data type (param 4: T) - if param_index == 4: - type_map = { - 'float': 'tag_float', - '__half': 'tag_half', - 'int8_t': 'tag_int8', - 'uint8_t': 'tag_uint8', - 'signed char': 'tag_int8', - 'unsigned char': 'tag_uint8' - } - return type_map.get(param_value, param_value) - - # Accumulator type (param 5: AccT) - elif param_index == 5: - acc_map = { - 'float': 'tag_acc_float', - '__half': 'tag_acc_half', - 'int32_t': 'tag_acc_int32', - 'uint32_t': 'tag_acc_uint32', - 'int': 'tag_acc_int32', - 'unsigned int': 'tag_acc_uint32', - 'signed int': 'tag_acc_int32' - } - return acc_map.get(param_value, param_value) - - # Index type (param 6: IdxT) - always int64_t - elif param_index == 6: - return 'tag_idx_int64' - - # Sample filter type (param 7: IvfSampleFilterT) - elif param_index == 7: - # Get the IdxT tag - idx_tag = param_to_tag(6, all_params[6], all_params) - - # Determine filter implementation tag - if 'bitset_filter' in param_value: - filter_impl_tag = 'tag_filter_bitset_impl' - elif 'none_sample_filter' in param_value: - filter_impl_tag = 'tag_filter_none_impl' - else: - filter_impl_tag = 'tag_filter_none_impl' - - # Return templated tag_filter with tag types - return f'tag_filter<{idx_tag}, {filter_impl_tag}>' - - # Distance metric (param 8: Lambda) - elif param_index == 8: - # Extract veclen from the Lambda type - veclen_match = re.search(r'<(\d+),', param_value) - veclen = veclen_match.group(1) if veclen_match else all_params[1] - - # Get tags for T and AccT - T_tag = param_to_tag(4, all_params[4], all_params) - AccT_tag = param_to_tag(5, all_params[5], all_params) - - # Return templated tag based on metric type - if 'euclidean_dist' in param_value: - return f'tag_metric_euclidean<{veclen}, {T_tag}, {AccT_tag}>' - elif 'inner_prod_dist' in param_value: - return f'tag_metric_inner_product<{veclen}, {T_tag}, {AccT_tag}>' - return param_value - - # Post-processing lambda (param 9: PostLambda) - elif param_index == 9: - if 'identity_op' in param_value: - return 'tag_post_identity' - elif 'sqrt_op' in param_value: - return 'tag_post_sqrt' - elif 'compose_op' in param_value: - return 'tag_post_compose' - return param_value - - return param_value + """Generate filename from kernel parameters.""" + capacity = params['capacity'] + veclen = params['veclen'] + ascending = 'true' if params['ascending'] else 'false' + compute_norm = 'true' if params['compute_norm'] else 'false' + type_abbrev = params['type_abbrev'] + acc_abbrev = params['acc_abbrev'] + idx_abbrev = params['idx_abbrev'] + + return f"interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{type_abbrev}_{acc_abbrev}_{idx_abbrev}.cu" def generate_cuda_file_content(params): - """Generate the content of a CUDA kernel file.""" - filename = generate_register_function_name(params) - embedded_var_name = f"embedded_{filename}" - - # The kernel now has 8 template parameters (removed MetricTag and FilterT) - # params[0-3]: Capacity, Veclen, Ascending, ComputeNorm - # params[4]: T (data type) - # params[5]: AccT (accumulator type) - # params[6]: IdxT (index type) - # params[7]: IvfSampleFilterT (filter type - NOT used in template anymore) - # params[8]: Lambda (metric - NOT used in template anymore) - # params[9]: PostLambda (post-processing operator) - - # Template parameters without MetricTag, FilterT, and PostLambda (params 0-6) - template_params_list = params[0:7] - template_params = ', '.join(template_params_list) - - # Convert params 4-6 to tag types for registerAlgorithm (NO metric/filter/postlambda tags) - tag_params = [param_to_tag(i, params[i], params) for i in [4, 5, 6]] - register_template_params = ', '.join(tag_params) - - # Create the string parameter with first four params (Capacity, Veclen, Ascending, ComputeNorm) - string_param = f"interleaved_scan_kernel_{params[0]}_{params[1]}_{params[2]}_{params[3]}" - - # Function parameters for the kernel instantiation (updated signature - PostLambda removed) + """Generate the content of a CUDA kernel instantiation file.""" + capacity = params['capacity'] + veclen = params['veclen'] + ascending = 'true' if params['ascending'] else 'false' + compute_norm = 'true' if params['compute_norm'] else 'false' + data_type = params['data_type'] + acc_type = params['acc_type'] + idx_type = params['idx_type'] + content = f"""/* * Copyright (c) 2025, NVIDIA CORPORATION. * @@ -265,78 +122,54 @@ def generate_cuda_file_content(params): * limitations under the License. */ +// This file is auto-generated. Do not edit manually. + #ifdef BUILD_KERNEL #include namespace cuvs::neighbors::ivf_flat::detail {{ -template __global__ void interleaved_scan_kernel<{template_params}>(unsigned int, {params[4]} const*, unsigned int const*, {params[4]} const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, {params[6]}* const* const, unsigned int*, {params[6]}, {params[6]}, unsigned int*, float*); +// Instantiate the kernel template +template __global__ void interleaved_scan_kernel<{capacity}, {veclen}, {ascending}, {compute_norm}, {data_type}, {acc_type}, {idx_type}>( + const uint32_t, const {data_type}*, const uint32_t*, const {data_type}* const*, const uint32_t*, + const uint32_t, const uint32_t, const uint32_t, const uint32_t, const uint32_t*, const uint32_t, + {idx_type}* const* const, uint32_t*, {idx_type}, {idx_type}, uint32_t*, float*); }} // namespace cuvs::neighbors::ivf_flat::detail #else -#include "{filename}.h" #include #include +#include "interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params['type_abbrev']}_{params['acc_abbrev']}_{params['idx_abbrev']}.h" -__attribute__((__constructor__)) static void register_{filename}() -{{ using namespace cuvs::neighbors::ivf_flat::detail; -registerAlgorithm< - {register_template_params}>("{string_param}", - {embedded_var_name}, - sizeof({embedded_var_name})); + +__attribute__((__constructor__)) static void register_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params['type_abbrev']}_{params['acc_abbrev']}_{params['idx_abbrev']}() +{{ + registerAlgorithm( + "interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}", + embedded_interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params['type_abbrev']}_{params['acc_abbrev']}_{params['idx_abbrev']}, + sizeof(embedded_interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params['type_abbrev']}_{params['acc_abbrev']}_{params['idx_abbrev']})); }} #endif """ - return content def generate_metric_device_function_content(metric_name, veclen, data_type, acc_type): """Generate content for a metric device function file.""" - # Map types to their tag equivalents - # Mapping for data types (T) - data_type_to_tag = { - 'float': 'tag_float', - '__half': 'tag_half', - 'int8_t': 'tag_int8', - 'uint8_t': 'tag_uint8', - } - - # Mapping for accumulator types (AccT) - acc_type_to_tag = { - 'float': 'tag_acc_float', - '__half': 'tag_acc_half', - 'int32_t': 'tag_acc_int32', - 'uint32_t': 'tag_acc_uint32', - } - - # Get abbreviated names for filename - type_abbrev = { - 'float': 'f', - '__half': 'h', - 'int8_t': 'i8', - 'uint8_t': 'u8', - 'int32_t': 'i32', - 'uint32_t': 'u32', - } - - data_tag = data_type_to_tag.get(data_type, data_type) - acc_tag = acc_type_to_tag.get(acc_type, acc_type) - - # Determine which header to include and implementation struct based on metric + type_abbrev = {'float': 'f', '__half': 'h', 'uint8_t': 'uc', 'int8_t': 'sc'}[data_type] + acc_abbrev = {'float': 'f', '__half': 'h', 'uint32_t': 'ui', 'int32_t': 'i'}[acc_type] + if metric_name == 'euclidean': header_file = 'neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh' - metric_impl = 'euclidean_dist' - elif metric_name == 'inner_prod': + else: # inner_prod header_file = 'neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh' - metric_impl = 'inner_prod_dist' - else: - raise ValueError(f"Unknown metric: {metric_name}") content = f"""/* * Copyright (c) 2025, NVIDIA CORPORATION. @@ -354,28 +187,34 @@ def generate_metric_device_function_content(metric_name, veclen, data_type, acc_ * limitations under the License. */ +// This file is auto-generated. Do not edit manually. + #ifdef BUILD_KERNEL -#include "{header_file}" +#include <{header_file}> namespace cuvs::neighbors::ivf_flat::detail {{ +// Instantiate the device function template template __device__ void compute_dist<{veclen}, {data_type}, {acc_type}>({acc_type}&, {acc_type}, {acc_type}); }} // namespace cuvs::neighbors::ivf_flat::detail #else -#include "{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}.h" #include #include +#include "metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}.h" -__attribute__((__constructor__)) static void register_{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}() -{{ using namespace cuvs::neighbors::ivf_flat::detail; -registerAlgorithm<{data_tag}, {acc_tag}>("{metric_name}_{veclen}", - embedded_{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}, - sizeof(embedded_{metric_name}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]})); + +__attribute__((__constructor__)) static void register_metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}() +{{ + registerAlgorithm( + "{metric_name}_{veclen}", + embedded_metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}, + sizeof(embedded_metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev})); }} #endif @@ -385,13 +224,10 @@ def generate_metric_device_function_content(metric_name, veclen, data_type, acc_ def generate_filter_device_function_content(filter_name): """Generate content for a filter device function file.""" - # Determine which header to include based on filter name if filter_name == 'filter_none': header_file = 'neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh' - elif filter_name == 'filter_bitset': + else: # filter_bitset header_file = 'neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh' - else: - raise ValueError(f"Unknown filter: {filter_name}") content = f"""/* * Copyright (c) 2025, NVIDIA CORPORATION. @@ -409,28 +245,30 @@ def generate_filter_device_function_content(filter_name): * limitations under the License. */ +// This file is auto-generated. Do not edit manually. + #ifdef BUILD_KERNEL -#include "{header_file}" +#include <{header_file}> namespace cuvs::neighbors::ivf_flat::detail {{ -template __device__ bool sample_filter(int64_t* const* const inds_ptrs, const uint32_t query_ix, const uint32_t cluster_ix, const uint32_t sample_ix, uint32_t* bitset_ptr, int64_t bitset_len, int64_t original_nbits); +// Instantiate the device function template +template __device__ bool sample_filter(int64_t* const* const, const uint32_t, const uint32_t, const uint32_t, uint32_t*, int64_t, int64_t); }} // namespace cuvs::neighbors::ivf_flat::detail #else -#include "{filter_name}.h" #include -#include +#include "{filter_name}.h" __attribute__((__constructor__)) static void register_{filter_name}() {{ -using namespace cuvs::neighbors::ivf_flat::detail; -registerAlgorithm("{filter_name}", - embedded_{filter_name}, - sizeof(embedded_{filter_name})); + registerAlgorithm( + "{filter_name}", + embedded_{filter_name}, + sizeof(embedded_{filter_name})); }} #endif @@ -438,105 +276,14 @@ def generate_filter_device_function_content(filter_name): return content -def generate_metric_device_functions(script_dir, output_base_dir): - """Generate all metric device function files.""" - # Define all combinations we need - # Based on the kernel signatures, we have: - # - Veclen: 1, 2, 4, 8, 16 - # - Data types: float, __half, int8_t, uint8_t - # - Acc types: float (for float), __half (for __half), int32_t (for int8_t), uint32_t (for uint8_t) - # - Metrics: euclidean, inner_prod - - type_combinations = [ - ('float', 'float'), - ('__half', '__half'), - ('int8_t', 'int32_t'), - ('uint8_t', 'uint32_t'), - ] - - veclens = [1, 2, 4, 8, 16] - metrics = ['euclidean', 'inner_prod'] - - output_dir = output_base_dir / 'metric_device_functions' - output_dir.mkdir(parents=True, exist_ok=True) - - generated_files = [] - - type_abbrev = { - 'float': 'f', - '__half': 'h', - 'int8_t': 'i8', - 'uint8_t': 'u8', - 'int32_t': 'i32', - 'uint32_t': 'u32', - } - - for metric in metrics: - for veclen in veclens: - for data_type, acc_type in type_combinations: - filename = f"{metric}_{veclen}_{type_abbrev[data_type]}_{type_abbrev[acc_type]}.cu" - file_content = generate_metric_device_function_content(metric, veclen, data_type, acc_type) - - # Write file only if it doesn't exist or content has changed - output_file = output_dir / filename - should_write = True - if output_file.exists(): - with open(output_file, 'r') as f: - existing_content = f.read() - should_write = (existing_content != file_content) - - if should_write: - with open(output_file, 'w') as f: - f.write(file_content) - - generated_files.append(filename) - - print(f"Generated {len(generated_files)} metric device function files") - return generated_files - - -def generate_filter_device_functions(script_dir, output_base_dir): - """Generate all filter device function files.""" - filters = ['filter_none', 'filter_bitset'] - - output_dir = output_base_dir / 'filter_device_functions' - output_dir.mkdir(parents=True, exist_ok=True) - - generated_files = [] - - for filter_name in filters: - filename = f"{filter_name}.cu" - file_content = generate_filter_device_function_content(filter_name) - - # Write file only if it doesn't exist or content has changed - output_file = output_dir / filename - should_write = True - if output_file.exists(): - with open(output_file, 'r') as f: - existing_content = f.read() - should_write = (existing_content != file_content) - - if should_write: - with open(output_file, 'w') as f: - f.write(file_content) - - generated_files.append(filename) - - print(f"Generated {len(generated_files)} filter device function files") - return generated_files - - def generate_post_lambda_device_function_content(post_lambda_name): """Generate content for a post lambda device function file.""" - # Determine which header to include based on post lambda name if post_lambda_name == 'post_identity': header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh' elif post_lambda_name == 'post_sqrt': header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh' - elif post_lambda_name == 'post_compose': + else: # post_compose header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh' - else: - raise ValueError(f"Unknown post lambda: {post_lambda_name}") content = f"""/* * Copyright (c) 2025, NVIDIA CORPORATION. @@ -554,28 +301,30 @@ def generate_post_lambda_device_function_content(post_lambda_name): * limitations under the License. */ +// This file is auto-generated. Do not edit manually. + #ifdef BUILD_KERNEL -#include "{header_file}" +#include <{header_file}> namespace cuvs::neighbors::ivf_flat::detail {{ -template __device__ float post_process(float val); +// Instantiate the device function template +template __device__ float post_process(float); }} // namespace cuvs::neighbors::ivf_flat::detail #else -#include "{post_lambda_name}.h" #include -#include +#include "{post_lambda_name}.h" __attribute__((__constructor__)) static void register_{post_lambda_name}() {{ -using namespace cuvs::neighbors::ivf_flat::detail; -registerAlgorithm("{post_lambda_name}", - embedded_{post_lambda_name}, - sizeof(embedded_{post_lambda_name})); + registerAlgorithm( + "{post_lambda_name}", + embedded_{post_lambda_name}, + sizeof(embedded_{post_lambda_name})); }} #endif @@ -583,177 +332,138 @@ def generate_post_lambda_device_function_content(post_lambda_name): return content -def generate_post_lambda_device_functions(script_dir, output_base_dir): - """Generate all post lambda device function files.""" - post_lambdas = ['post_identity', 'post_sqrt', 'post_compose'] +def generate_metric_device_functions(output_base_dir): + """Generate all metric device function files.""" + metric_dir = output_base_dir / 'metric_device_functions' + metric_dir.mkdir(parents=True, exist_ok=True) - output_dir = output_base_dir / 'post_lambda_device_functions' - output_dir.mkdir(parents=True, exist_ok=True) + metric_files = [] - generated_files = [] + for metric_name in METRIC_CONFIGS: + for data_type, acc_type, veclens, type_abbrev, acc_abbrev in DATA_TYPE_CONFIGS: + for veclen in veclens: + filename = f"metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}.cu" + filepath = metric_dir / filename - for post_lambda_name in post_lambdas: - filename = f"{post_lambda_name}.cu" - file_content = generate_post_lambda_device_function_content(post_lambda_name) + content = generate_metric_device_function_content(metric_name, veclen, data_type, acc_type) - # Write file only if it doesn't exist or content has changed - output_file = output_dir / filename - should_write = True - if output_file.exists(): - with open(output_file, 'r') as f: - existing_content = f.read() - should_write = (existing_content != file_content) + # Only write if content has changed + if not filepath.exists() or filepath.read_text() != content: + filepath.write_text(content) - if should_write: - with open(output_file, 'w') as f: - f.write(file_content) + metric_files.append(filename) - generated_files.append(filename) + return metric_files - print(f"Generated {len(generated_files)} post lambda device function files") - return generated_files +def generate_filter_device_functions(output_base_dir): + """Generate all filter device function files.""" + filter_dir = output_base_dir / 'filter_device_functions' + filter_dir.mkdir(parents=True, exist_ok=True) -def main(): - import sys + filter_files = [] - # Get the script directory to find the kernels file - script_dir = Path(__file__).parent.absolute() + for filter_name in FILTER_CONFIGS: + filename = f"{filter_name}.cu" + filepath = filter_dir / filename - # Read the kernels file (in the same directory as this script) - kernels_file = script_dir / 'interleaved_scan_kernels.txt' - if not kernels_file.exists(): - print(f"Error: {kernels_file} not found!") - return + content = generate_filter_device_function_content(filter_name) - with open(kernels_file, 'r') as f: - lines = f.readlines() + # Only write if content has changed + if not filepath.exists() or filepath.read_text() != content: + filepath.write_text(content) - # Output directory - use command line argument if provided, otherwise use source dir - if len(sys.argv) > 1: - output_base_dir = Path(sys.argv[1]).absolute() - else: - output_base_dir = script_dir + filter_files.append(filename) - output_dir = output_base_dir / 'interleaved_scan_kernels' - output_dir.mkdir(parents=True, exist_ok=True) + return filter_files - # Parse all kernels and generate files - # Use a dict to deduplicate by filename (since we exclude metric from filename) - unique_kernels = {} - for line_num, line in enumerate(lines, 1): - line = line.strip() - if not line: - continue +def generate_post_lambda_device_functions(output_base_dir): + """Generate all post lambda device function files.""" + post_lambda_dir = output_base_dir / 'post_lambda_device_functions' + post_lambda_dir.mkdir(parents=True, exist_ok=True) - # Extract the full template from the function signature - start = line.find('interleaved_scan_kernel<') - if start == -1: - continue + post_lambda_files = [] - start += len('interleaved_scan_kernel<') - depth = 1 - end = start + for post_lambda_name in POST_LAMBDA_CONFIGS: + filename = f"{post_lambda_name}.cu" + filepath = post_lambda_dir / filename - while depth > 0 and end < len(line): - if line[end] == '<': - depth += 1 - elif line[end] == '>': - depth -= 1 - end += 1 + content = generate_post_lambda_device_function_content(post_lambda_name) - template_str = line[start:end-1] - params = parse_template_parameters(template_str) + # Only write if content has changed + if not filepath.exists() or filepath.read_text() != content: + filepath.write_text(content) - if len(params) != 10: - print(f"Warning: Line {line_num} has {len(params)} parameters, expected 10") - continue + post_lambda_files.append(filename) - # Generate filename and content - filename = generate_filename(params) + return post_lambda_files + + +def main(): + import sys + + # Get the script directory + script_dir = Path(__file__).parent.absolute() - # Only generate if we haven't seen this filename yet (deduplication) - if filename not in unique_kernels: - file_content = generate_cuda_file_content(params) - unique_kernels[filename] = file_content + # Output directory - use CMAKE_CURRENT_BINARY_DIR if provided, otherwise use source dir + output_base_dir = Path(sys.argv[1]).absolute() if len(sys.argv) > 1 else script_dir + output_dir = output_base_dir / 'interleaved_scan_kernels' + output_dir.mkdir(parents=True, exist_ok=True) - # Write all unique kernel files + kernels = generate_kernel_combinations() + + # Generate kernel files generated_files = [] - for filename, file_content in unique_kernels.items(): - output_file = output_dir / filename - should_write = True - if output_file.exists(): - with open(output_file, 'r') as f: - existing_content = f.read() - should_write = (existing_content != file_content) - - if should_write: - with open(output_file, 'w') as f: - f.write(file_content) + for params in kernels: + filename = generate_filename(params) + filepath = output_dir / filename - generated_files.append(filename) + content = generate_cuda_file_content(params) - if len(generated_files) % 100 == 0: - print(f"Generated {len(generated_files)} files...") + # Only write if content has changed + if not filepath.exists() or filepath.read_text() != content: + filepath.write_text(content) - print(f"\nGenerated {len(generated_files)} CUDA kernel files") + generated_files.append(filename) # Generate metric device function files - metric_files = generate_metric_device_functions(script_dir, output_base_dir) + metric_files = generate_metric_device_functions(output_base_dir) # Generate filter device function files - filter_files = generate_filter_device_functions(script_dir, output_base_dir) + filter_files = generate_filter_device_functions(output_base_dir) # Generate post lambda device function files - post_lambda_files = generate_post_lambda_device_functions(script_dir, output_base_dir) - - # Generate CMake file with all filenames - # CMake file goes to the binary directory - cmake_dir = output_base_dir - cmake_dir.mkdir(parents=True, exist_ok=True) - cmake_file = cmake_dir / 'interleaved_scan.cmake' - - # Generate CMake content - # Paths are now relative to CMAKE_CURRENT_BINARY_DIR - cmake_content = "# Auto-generated list of interleaved scan kernel files\n" - cmake_content += "# Generated by generate_kernels.py\n\n" + post_lambda_files = generate_post_lambda_device_functions(output_base_dir) + # Generate CMake file listing all generated files + cmake_file = output_base_dir / 'interleaved_scan.cmake' + + cmake_content = "# Auto-generated file listing all kernel and device function files\n\n" + cmake_content += "set(INTERLEAVED_SCAN_KERNEL_FILES\n" for filename in sorted(generated_files): cmake_content += f" generated_kernels/interleaved_scan_kernels/{filename}\n" cmake_content += ")\n\n" - # Add metric device function files cmake_content += "set(METRIC_DEVICE_FUNCTION_FILES\n" for filename in sorted(metric_files): cmake_content += f" generated_kernels/metric_device_functions/{filename}\n" cmake_content += ")\n\n" - # Add filter device function files cmake_content += "set(FILTER_DEVICE_FUNCTION_FILES\n" for filename in sorted(filter_files): cmake_content += f" generated_kernels/filter_device_functions/{filename}\n" cmake_content += ")\n\n" - # Add post lambda device function files cmake_content += "set(POST_LAMBDA_DEVICE_FUNCTION_FILES\n" for filename in sorted(post_lambda_files): cmake_content += f" generated_kernels/post_lambda_device_functions/{filename}\n" cmake_content += ")\n" # Only write if content has changed - should_write_cmake = True - if cmake_file.exists(): - with open(cmake_file, 'r') as f: - existing_cmake = f.read() - should_write_cmake = (existing_cmake != cmake_content) - - if should_write_cmake: - with open(cmake_file, 'w') as f: - f.write(cmake_content) - print(f"Updated CMake file: {cmake_file}") - else: - print(f"CMake file unchanged: {cmake_file}") + if not cmake_file.exists() or cmake_file.read_text() != cmake_content: + cmake_file.write_text(cmake_content) + if __name__ == '__main__': main() diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt deleted file mode 100644 index e0997394b1..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt +++ /dev/null @@ -1,1280 +0,0 @@ -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<1, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<2, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<4, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<0, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<8, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<16, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<32, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<64, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<128, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::identity_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, false, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<4, float, float>, raft::sqrt_op, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::identity_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, false, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<8, __half, __half>, raft::sqrt_op, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, true, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 8, false, true, __half, __half, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<8, __half, __half>, raft::compose_op, raft::plug_const_op >, unsigned int, __half const*, unsigned int const*, __half const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, true, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 4, false, true, float, float, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<4, float, float>, raft::compose_op, raft::plug_const_op >, unsigned int, float const*, unsigned int const*, float const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<16, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::identity_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, signed char, int>, raft::compose_op, raft::plug_const_op >, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, true, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op > >(cuvs::neighbors::ivf_flat::detail::inner_prod_dist<1, unsigned char, unsigned int>, raft::compose_op, raft::plug_const_op >, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::identity_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 1, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<1, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, unsigned char, unsigned int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, unsigned char, unsigned int>, raft::sqrt_op, unsigned int, unsigned char const*, unsigned int const*, unsigned char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, false, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) -void cuvs::neighbors::ivf_flat::detail::interleaved_scan_kernel<256, 16, true, false, signed char, int, long, cuvs::neighbors::filtering::ivf_to_sample_filter >, cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op>(cuvs::neighbors::ivf_flat::detail::euclidean_dist<16, signed char, int>, raft::sqrt_op, unsigned int, signed char const*, unsigned int const*, signed char const* const*, unsigned int const*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int const*, unsigned int, cuvs::neighbors::filtering::ivf_to_sample_filter >, unsigned int*, float*) diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp index 6fc47a1deb..2fc670a8e9 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp @@ -19,19 +19,19 @@ namespace cuvs::neighbors::ivf_flat::detail { // Tag types for data types -struct tag_float {}; -struct tag_half {}; -struct tag_int8 {}; -struct tag_uint8 {}; +struct tag_f {}; +struct tag_h {}; +struct tag_sc {}; +struct tag_uc {}; // Tag types for accumulator types -struct tag_acc_float {}; -struct tag_acc_half {}; -struct tag_acc_int32 {}; -struct tag_acc_uint32 {}; +struct tag_acc_f {}; +struct tag_acc_h {}; +struct tag_acc_i {}; +struct tag_acc_ui {}; // Tag types for index types -struct tag_idx_int64 {}; +struct tag_idx_l {}; // Tag types for filter subtypes struct tag_filter_bitset_impl {}; diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh index 1264f27e5a..8d6327ce1e 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh @@ -16,7 +16,6 @@ #pragma once -#include "interleaved_scan_tags.hpp" #include namespace cuvs::neighbors::ivf_flat::detail { diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh index a69ab2f71f..09b701e7c4 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh @@ -16,7 +16,6 @@ #pragma once -#include "interleaved_scan_tags.hpp" #include namespace cuvs::neighbors::ivf_flat::detail { From 37f1163d8839f825db467aa80e9ff7b78f34205c Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Oct 2025 20:35:13 +0000 Subject: [PATCH 011/158] random cmake changes carried over from 25.10 --- cpp/cmake/modules/ConfigureCUDA.cmake | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index 5ce9b918b4..d8576cd650 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -19,6 +19,11 @@ if(DISABLE_DEPRECATION_WARNINGS) ) endif() +if(DISABLE_OPENMP) + list(APPEND CUVS_CXX_FLAGS -Wno-unknown-pragmas) + list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wno-unknown-pragmas) +endif() + # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with # clang) if(CMAKE_COMPILER_IS_GNUCXX) @@ -64,6 +69,6 @@ endif() # Debug options if(CMAKE_BUILD_TYPE MATCHES Debug) message(VERBOSE "cuVS: Building with debugging flags") - list(APPEND CUVS_CUDA_FLAGS -G -Xcompiler=-rdynamic) + list(APPEND CUVS_CUDA_FLAGS -G -Xcompiler=-rdynamic --maxrregcount=64) list(APPEND CUVS_CUDA_FLAGS -Xptxas --suppress-stack-size-warning) endif() From 0ae5383b43e7bfd875d2b477ad22d469cc97bea9 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Oct 2025 20:49:21 +0000 Subject: [PATCH 012/158] cmake format --- cpp/CMakeLists.txt | 66 +++++++++---------- cpp/cmake/modules/embed_fatbins.cmake | 37 +++++------ cpp/cmake/modules/generate_header.cmake | 13 ++-- .../generate_interleaved_scan_kernels.cmake | 50 ++++++++++---- 4 files changed, 91 insertions(+), 75 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d6554fc162..221f97bcd8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -334,10 +334,8 @@ if(NOT BUILD_CPU_ONLY) ) endif() - set(JIT_LTO_FILES - src/detail/jit_lto/AlgorithmLauncher.cu - src/detail/jit_lto/FragmentDatabase.cu - src/detail/jit_lto/FragmentEntry.cu + set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/FragmentDatabase.cu + src/detail/jit_lto/FragmentEntry.cu ) add_library( @@ -570,58 +568,55 @@ if(NOT BUILD_CPU_ONLY) ) add_library( - jit_lto_fatbins OBJECT - ${INTERLEAVED_SCAN_KERNEL_FILES} - ${METRIC_DEVICE_FUNCTION_FILES} - ${FILTER_DEVICE_FUNCTION_FILES} - ${POST_LAMBDA_DEVICE_FUNCTION_FILES} + jit_lto_fatbins OBJECT ${INTERLEAVED_SCAN_KERNEL_FILES} ${METRIC_DEVICE_FUNCTION_FILES} + ${FILTER_DEVICE_FUNCTION_FILES} ${POST_LAMBDA_DEVICE_FUNCTION_FILES} ) # Make sure the kernels are generated before we try to build them add_dependencies(jit_lto_fatbins ${INTERLEAVED_SCAN_KERNELS_TARGET}) target_compile_definitions(jit_lto_fatbins PRIVATE BUILD_KERNEL) - target_include_directories(jit_lto_fatbins PRIVATE - "$" - "$" + target_include_directories( + jit_lto_fatbins PRIVATE "$" + "$" ) target_compile_options(jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size) target_compile_options( jit_lto_fatbins PRIVATE "$<$:${CUVS_CXX_FLAGS}>" "$<$:${CUVS_CUDA_FLAGS}>" ) - set_target_properties(jit_lto_fatbins PROPERTIES - CUDA_ARCHITECTURES "75-real" - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION ON - CUDA_FATBIN_COMPILATION ON - POSITION_INDEPENDENT_CODE ON - INTERPROCEDURAL_OPTIMIZATION ON) + set_target_properties( + jit_lto_fatbins + PROPERTIES CUDA_ARCHITECTURES "75-real" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + CUDA_SEPARABLE_COMPILATION ON + CUDA_FATBIN_COMPILATION ON + POSITION_INDEPENDENT_CODE ON + INTERPROCEDURAL_OPTIMIZATION ON + ) target_link_libraries(jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) - add_library(jit_lto_fatbins_as_cpp_sources STATIC - src/detail/jit_lto/AlgorithmPlanner.cu - ) + add_library(jit_lto_fatbins_as_cpp_sources STATIC src/detail/jit_lto/AlgorithmPlanner.cu) # Set PIC for the static library since it will be linked into a shared library - set_target_properties(jit_lto_fatbins_as_cpp_sources PROPERTIES - CUDA_ARCHITECTURES "75-real" - POSITION_INDEPENDENT_CODE ON + set_target_properties( + jit_lto_fatbins_as_cpp_sources PROPERTIES CUDA_ARCHITECTURES "75-real" POSITION_INDEPENDENT_CODE + ON ) # Make sure the kernels are generated before embedding fatbins add_dependencies(jit_lto_fatbins_as_cpp_sources ${INTERLEAVED_SCAN_KERNELS_TARGET}) - target_include_directories(jit_lto_fatbins_as_cpp_sources PRIVATE - "$" - "$" + target_include_directories( + jit_lto_fatbins_as_cpp_sources PRIVATE "$" + "$" ) - # target_compile_options( - # jit_lto_fatbins_as_cpp_sources PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - # "$<$:${CUVS_CUDA_FLAGS}>" + # target_compile_options( jit_lto_fatbins_as_cpp_sources PRIVATE + # "$<$:${CUVS_CXX_FLAGS}>" + # "$<$:${CUVS_CUDA_FLAGS}>" # ) target_link_libraries(jit_lto_fatbins_as_cpp_sources PRIVATE CUDA::cuda_driver) @@ -697,7 +692,8 @@ if(NOT BUILD_CPU_ONLY) $> $> $<$:CUDA::nvtx3> - PRIVATE nvidia::cutlass::cutlass $ + PRIVATE nvidia::cutlass::cutlass + $ cuvs-cagra-search $ CUDA::nvJitLink diff --git a/cpp/cmake/modules/embed_fatbins.cmake b/cpp/cmake/modules/embed_fatbins.cmake index d7573db36b..d16421b626 100644 --- a/cpp/cmake/modules/embed_fatbins.cmake +++ b/cpp/cmake/modules/embed_fatbins.cmake @@ -12,49 +12,46 @@ # the License. # ============================================================================= - function(embed_fatbins library_name kernel_target) find_package(CUDAToolkit REQUIRED) - find_program(bin_to_c + find_program( + bin_to_c NAMES bin2c PATHS ${CUDAToolkit_BIN_DIR} - ) + ) set(output_dir ${CMAKE_CURRENT_BINARY_DIR}/${library_name}) # Create a response file to avoid "argument list too long" errors set(objects_response_file ${CMAKE_CURRENT_BINARY_DIR}/${library_name}_objects.rsp) - # Write the objects list to a response file using file(GENERATE) which handles generator expressions - file(GENERATE + # Write the objects list to a response file using file(GENERATE) which handles generator + # expressions + file( + GENERATE OUTPUT "${objects_response_file}" CONTENT "$,\n>\n" - ) + ) # Generate individual headers for each FATBIN object add_custom_command( OUTPUT "${output_dir}/headers_generated.stamp" - COMMAND ${CMAKE_COMMAND} - "-DBIN_TO_C_COMMAND=${bin_to_c}" - "-DOBJECTS_RESPONSE_FILE=${objects_response_file}" - "-DOUTPUT_DIR=${output_dir}" - "-DSTAMP_FILE=${output_dir}/headers_generated.stamp" - -P ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/generate_header.cmake + COMMAND + ${CMAKE_COMMAND} "-DBIN_TO_C_COMMAND=${bin_to_c}" + "-DOBJECTS_RESPONSE_FILE=${objects_response_file}" "-DOUTPUT_DIR=${output_dir}" + "-DSTAMP_FILE=${output_dir}/headers_generated.stamp" -P + ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/generate_header.cmake VERBATIM DEPENDS "${objects_response_file}" $ COMMENT "Converting FATBIN kernels to individual C++ headers" - ) + ) - # get the sources of `kernel_target` and add them as CUDA - # sources so we re-compile them to get the inline registration logic + # get the sources of `kernel_target` and add them as CUDA sources so we re-compile them to get the + # inline registration logic get_target_property(output_sources ${kernel_target} SOURCES) # add those c++ sources to `library_name` - target_sources(${library_name} - PRIVATE - "${output_dir}/headers_generated.stamp" - ${output_sources} - ) + target_sources(${library_name} PRIVATE "${output_dir}/headers_generated.stamp" ${output_sources}) target_compile_features(${library_name} PRIVATE cxx_std_20) target_include_directories(${library_name} PRIVATE ${output_dir}) endfunction() diff --git a/cpp/cmake/modules/generate_header.cmake b/cpp/cmake/modules/generate_header.cmake index a095d76775..83c48cb086 100644 --- a/cpp/cmake/modules/generate_header.cmake +++ b/cpp/cmake/modules/generate_header.cmake @@ -39,12 +39,13 @@ foreach(obj ${objects_list}) set(header_file "${OUTPUT_DIR}/${obj_name}.h") set(args -c -p 0x0 --name embedded_${obj_name} ${obj}) - execute_process(COMMAND "${BIN_TO_C_COMMAND}" ${args} - WORKING_DIRECTORY ${obj_dir} - RESULT_VARIABLE result - OUTPUT_VARIABLE output - ERROR_VARIABLE error_var - ) + execute_process( + COMMAND "${BIN_TO_C_COMMAND}" ${args} + WORKING_DIRECTORY ${obj_dir} + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error_var + ) if(NOT result EQUAL 0) message(FATAL_ERROR "Failed to process ${obj}: ${error_var}") endif() diff --git a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake index 0a7049afe3..ce18ff81ad 100644 --- a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake +++ b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake @@ -16,8 +16,12 @@ function(generate_interleaved_scan_kernels) find_package(Python3 REQUIRED COMPONENTS Interpreter) - set(KERNEL_LIST_FILE ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt) - set(GENERATOR_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py) + set(KERNEL_LIST_FILE + ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt + ) + set(GENERATOR_SCRIPT + ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py + ) set(OUTPUT_BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated_kernels) set(CMAKE_LIST_FILE ${OUTPUT_BASE_DIR}/interleaved_scan.cmake) set(STAMP_FILE ${CMAKE_CURRENT_BINARY_DIR}/kernels_generated.stamp) @@ -33,12 +37,9 @@ function(generate_interleaved_scan_kernels) ) # Create a custom target that depends on the stamp file - add_custom_target(generate_interleaved_scan_kernels_target - DEPENDS ${STAMP_FILE} - ) + add_custom_target(generate_interleaved_scan_kernels_target DEPENDS ${STAMP_FILE}) - # Include the generated CMake list file - # Only generate if the CMake list file doesn't exist + # Include the generated CMake list file Only generate if the CMake list file doesn't exist if(NOT EXISTS ${CMAKE_LIST_FILE}) message(VERBOSE "Generating interleaved scan kernels for the first time...") execute_process( @@ -49,7 +50,10 @@ function(generate_interleaved_scan_kernels) ) if(NOT GENERATION_RESULT EQUAL 0) - message(FATAL_ERROR "Failed to generate kernel files during configuration\nOutput: ${GENERATION_OUTPUT}\nError: ${GENERATION_ERROR}") + message( + FATAL_ERROR + "Failed to generate kernel files during configuration\nOutput: ${GENERATION_OUTPUT}\nError: ${GENERATION_ERROR}" + ) endif() endif() @@ -81,10 +85,28 @@ function(generate_interleaved_scan_kernels) endforeach() # Return the lists to parent scope - set(INTERLEAVED_SCAN_KERNEL_FILES ${FULL_PATH_KERNEL_FILES} PARENT_SCOPE) - set(METRIC_DEVICE_FUNCTION_FILES ${FULL_PATH_METRIC_FILES} PARENT_SCOPE) - set(FILTER_DEVICE_FUNCTION_FILES ${FULL_PATH_FILTER_FILES} PARENT_SCOPE) - set(POST_LAMBDA_DEVICE_FUNCTION_FILES ${FULL_PATH_POST_LAMBDA_FILES} PARENT_SCOPE) - set(INTERLEAVED_SCAN_KERNELS_STAMP ${STAMP_FILE} PARENT_SCOPE) - set(INTERLEAVED_SCAN_KERNELS_TARGET generate_interleaved_scan_kernels_target PARENT_SCOPE) + set(INTERLEAVED_SCAN_KERNEL_FILES + ${FULL_PATH_KERNEL_FILES} + PARENT_SCOPE + ) + set(METRIC_DEVICE_FUNCTION_FILES + ${FULL_PATH_METRIC_FILES} + PARENT_SCOPE + ) + set(FILTER_DEVICE_FUNCTION_FILES + ${FULL_PATH_FILTER_FILES} + PARENT_SCOPE + ) + set(POST_LAMBDA_DEVICE_FUNCTION_FILES + ${FULL_PATH_POST_LAMBDA_FILES} + PARENT_SCOPE + ) + set(INTERLEAVED_SCAN_KERNELS_STAMP + ${STAMP_FILE} + PARENT_SCOPE + ) + set(INTERLEAVED_SCAN_KERNELS_TARGET + generate_interleaved_scan_kernels_target + PARENT_SCOPE + ) endfunction() From fe56aec1d3858c24cd94fcdf6cc2bae2099df0b1 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Oct 2025 20:51:14 +0000 Subject: [PATCH 013/158] remove dep on kernel list --- cpp/cmake/modules/generate_interleaved_scan_kernels.cmake | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake index ce18ff81ad..9e15cff5ab 100644 --- a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake +++ b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake @@ -16,9 +16,6 @@ function(generate_interleaved_scan_kernels) find_package(Python3 REQUIRED COMPONENTS Interpreter) - set(KERNEL_LIST_FILE - ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernels.txt - ) set(GENERATOR_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py ) @@ -31,7 +28,7 @@ function(generate_interleaved_scan_kernels) OUTPUT ${STAMP_FILE} COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} ${OUTPUT_BASE_DIR} COMMAND ${CMAKE_COMMAND} -E touch ${STAMP_FILE} - DEPENDS ${KERNEL_LIST_FILE} ${GENERATOR_SCRIPT} + DEPENDS ${GENERATOR_SCRIPT} COMMENT "Generating interleaved scan kernel files..." VERBATIM ) From 40c8fd64164b6dfba73df8fe4ae03806812dbc20 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Oct 2025 02:54:45 +0000 Subject: [PATCH 014/158] attempt to solve overlinking problem --- ci/build_wheel.sh | 1 + .../all_cuda-129_arch-aarch64.yaml | 1 + .../all_cuda-129_arch-x86_64.yaml | 1 + .../all_cuda-130_arch-aarch64.yaml | 1 + .../all_cuda-130_arch-x86_64.yaml | 1 + .../bench_ann_cuda-129_arch-aarch64.yaml | 2 ++ .../bench_ann_cuda-129_arch-x86_64.yaml | 2 ++ .../bench_ann_cuda-130_arch-aarch64.yaml | 2 ++ .../bench_ann_cuda-130_arch-x86_64.yaml | 2 ++ .../go_cuda-129_arch-aarch64.yaml | 2 ++ .../environments/go_cuda-129_arch-x86_64.yaml | 2 ++ .../go_cuda-130_arch-aarch64.yaml | 2 ++ .../environments/go_cuda-130_arch-x86_64.yaml | 2 ++ .../rust_cuda-129_arch-aarch64.yaml | 2 ++ .../rust_cuda-129_arch-x86_64.yaml | 2 ++ .../rust_cuda-130_arch-aarch64.yaml | 2 ++ .../rust_cuda-130_arch-x86_64.yaml | 2 ++ conda/recipes/libcuvs/recipe.yaml | 9 ++++--- cpp/CMakeLists.txt | 24 ++++++++++++----- dependencies.yaml | 27 ++++--------------- python/libcuvs/pyproject.toml | 3 +-- 21 files changed, 57 insertions(+), 35 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 48bea18916..305af498ec 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -17,6 +17,7 @@ cd "${package_dir}" EXCLUDE_ARGS=( --exclude "libcublas.so.*" --exclude "libcublasLt.so.*" + --exclude "libcuda.so.*" --exclude "libcurand.so.*" --exclude "libcusolver.so.*" --exclude "libcusparse.so.*" diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index c7d0d8b659..5b2e3bd896 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -12,6 +12,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 0969780718..979cde0845 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -12,6 +12,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index 04ccd3496b..985cb902d3 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -12,6 +12,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index 1c5d09dda0..63102ecb38 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -12,6 +12,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml index 5fe174ab6a..dc7b4a0a56 100644 --- a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml @@ -12,6 +12,7 @@ dependencies: - click - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -31,6 +32,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - nccl>=2.19 diff --git a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml index 098e1765fa..ec98a44b9f 100644 --- a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml @@ -12,6 +12,7 @@ dependencies: - click - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -33,6 +34,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - mkl-devel=2023 diff --git a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml index 3a54fecedb..9d445301dc 100644 --- a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml @@ -12,6 +12,7 @@ dependencies: - click - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -31,6 +32,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - nccl>=2.19 diff --git a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml index 95aa4d7a13..14acd0351e 100644 --- a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml @@ -12,6 +12,7 @@ dependencies: - click - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -33,6 +34,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - mkl-devel=2023 diff --git a/conda/environments/go_cuda-129_arch-aarch64.yaml b/conda/environments/go_cuda-129_arch-aarch64.yaml index b8bf557877..9c9f491ddf 100644 --- a/conda/environments/go_cuda-129_arch-aarch64.yaml +++ b/conda/environments/go_cuda-129_arch-aarch64.yaml @@ -11,6 +11,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -25,6 +26,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/go_cuda-129_arch-x86_64.yaml b/conda/environments/go_cuda-129_arch-x86_64.yaml index adc12d644b..b191ea9a9b 100644 --- a/conda/environments/go_cuda-129_arch-x86_64.yaml +++ b/conda/environments/go_cuda-129_arch-x86_64.yaml @@ -11,6 +11,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -25,6 +26,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/go_cuda-130_arch-aarch64.yaml b/conda/environments/go_cuda-130_arch-aarch64.yaml index ca450a317c..1583536989 100644 --- a/conda/environments/go_cuda-130_arch-aarch64.yaml +++ b/conda/environments/go_cuda-130_arch-aarch64.yaml @@ -11,6 +11,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -25,6 +26,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/go_cuda-130_arch-x86_64.yaml b/conda/environments/go_cuda-130_arch-x86_64.yaml index 5873836633..2ca05f88a5 100644 --- a/conda/environments/go_cuda-130_arch-x86_64.yaml +++ b/conda/environments/go_cuda-130_arch-x86_64.yaml @@ -11,6 +11,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -25,6 +26,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-129_arch-aarch64.yaml b/conda/environments/rust_cuda-129_arch-aarch64.yaml index 28d7701d68..8ce13c4204 100644 --- a/conda/environments/rust_cuda-129_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-129_arch-aarch64.yaml @@ -10,6 +10,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -22,6 +23,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/environments/rust_cuda-129_arch-x86_64.yaml b/conda/environments/rust_cuda-129_arch-x86_64.yaml index a21932185b..2dad738655 100644 --- a/conda/environments/rust_cuda-129_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-129_arch-x86_64.yaml @@ -10,6 +10,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -22,6 +23,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/environments/rust_cuda-130_arch-aarch64.yaml b/conda/environments/rust_cuda-130_arch-aarch64.yaml index 7533f45e23..8a5ac763a8 100644 --- a/conda/environments/rust_cuda-130_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-130_arch-aarch64.yaml @@ -10,6 +10,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -22,6 +23,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/environments/rust_cuda-130_arch-x86_64.yaml b/conda/environments/rust_cuda-130_arch-x86_64.yaml index 0b4dbd7b09..bdb40cc9a8 100644 --- a/conda/environments/rust_cuda-130_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-130_arch-x86_64.yaml @@ -10,6 +10,7 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev +- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api @@ -22,6 +23,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 +- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/recipes/libcuvs/recipe.yaml b/conda/recipes/libcuvs/recipe.yaml index 340533f092..bd820be51a 100644 --- a/conda/recipes/libcuvs/recipe.yaml +++ b/conda/recipes/libcuvs/recipe.yaml @@ -97,20 +97,20 @@ outputs: - cmake ${{ cmake_version }} - ${{ stdlib("c") }} host: - - libnvjitlink-dev - librmm =${{ minor_version }} - libraft-headers =${{ minor_version }} - nccl ${{ nccl_version }} - cuda-version =${{ cuda_version }} - cuda-cudart-dev + - cuda-driver-dev - cuda-profiler-api - libcublas-dev - libcurand-dev - libcusolver-dev - libcusparse-dev + - libnvjitlink-dev run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - - libnvjitlink-dev - libraft-headers =${{ minor_version }} - nccl - cuda-cudart @@ -118,9 +118,11 @@ outputs: - libcurand - libcusolver - libcusparse + - libnvjitlink ignore_run_exports: by_name: - cuda-cudart + - cuda-driver-dev - cuda-version - libaio - libboost @@ -128,8 +130,7 @@ outputs: - libcurand - libcusolver - libcusparse - - libcuda.so.1 - - libnvjitlink-dev + - libnvjitlink - librmm - mkl - nccl diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 221f97bcd8..f69726a026 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -144,6 +144,7 @@ if(DETECT_CONDA_ENV) ) set(CMAKE_INSTALL_PREFIX "$ENV{CONDA_PREFIX}") endif() + set(CONDA_STUB_PATH "$ENV{CONDA_PREFIX}/lib/stubs") endif() # ################################################################################################## @@ -585,9 +586,17 @@ if(NOT BUILD_CPU_ONLY) jit_lto_fatbins PRIVATE "$<$:${CUVS_CXX_FLAGS}>" "$<$:${CUVS_CUDA_FLAGS}>" ) + + set(JIT_LTO_TARGET_ARCHITECTURE "") + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0) + set(JIT_LTO_TARGET_ARCHITECTURE "70-real") + elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + set(JIT_LTO_TARGET_ARCHITECTURE "75-real") + endif() + set_target_properties( jit_lto_fatbins - PROPERTIES CUDA_ARCHITECTURES "75-real" + PROPERTIES CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON CUDA_STANDARD 17 @@ -603,8 +612,8 @@ if(NOT BUILD_CPU_ONLY) # Set PIC for the static library since it will be linked into a shared library set_target_properties( - jit_lto_fatbins_as_cpp_sources PROPERTIES CUDA_ARCHITECTURES "75-real" POSITION_INDEPENDENT_CODE - ON + jit_lto_fatbins_as_cpp_sources PROPERTIES CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} + POSITION_INDEPENDENT_CODE ON ) # Make sure the kernels are generated before embedding fatbins @@ -614,10 +623,10 @@ if(NOT BUILD_CPU_ONLY) jit_lto_fatbins_as_cpp_sources PRIVATE "$" "$" ) - # target_compile_options( jit_lto_fatbins_as_cpp_sources PRIVATE - # "$<$:${CUVS_CXX_FLAGS}>" - # "$<$:${CUVS_CUDA_FLAGS}>" - # ) + + target_link_directories( + jit_lto_fatbins_as_cpp_sources PRIVATE $<$:${CONDA_STUB_PATH}> + ) target_link_libraries(jit_lto_fatbins_as_cpp_sources PRIVATE CUDA::cuda_driver) embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) @@ -757,6 +766,7 @@ SECTIONS $> $<$:CUDA::nvtx3> PRIVATE nvidia::cutlass::cutlass $ + $ CUDA::nvJitLink ) endif() diff --git a/dependencies.yaml b/dependencies.yaml index 2f0a882107..f30e13b09e 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -16,7 +16,6 @@ files: - cuda_version - depends_on_cuda_python - depends_on_cupy - - depends_on_libnvjitlink_dev - depends_on_librmm - depends_on_pylibraft - depends_on_nccl @@ -138,7 +137,6 @@ files: table: tool.rapids-build-backend key: requires includes: - - depends_on_libnvjitlink_dev - depends_on_libraft - depends_on_librmm - depends_on_nccl @@ -150,7 +148,6 @@ files: table: project includes: - cuda_wheels - - depends_on_libnvjitlink_dev - depends_on_libraft - depends_on_librmm - depends_on_nccl @@ -319,11 +316,13 @@ dependencies: packages: - cuda-nvtx-dev - cuda-cudart-dev + - cuda-driver-dev - cuda-profiler-api - libcublas-dev - libcurand-dev - libcusolver-dev - libcusparse-dev + - libnvjitlink-dev cuda_wheels: specific: - output_types: [requirements, pyproject] @@ -333,11 +332,13 @@ dependencies: use_cuda_wheels: "true" packages: - cuda-toolkit[cublas,curand,cusolver,cusparse]==12.* + - nvidia-nvjitlink-cu12 - matrix: cuda: "13.*" use_cuda_wheels: "true" packages: - cuda-toolkit[cublas,curand,cusolver,cusparse]==13.* + - nvidia-nvjitlink==13.* - matrix: use_cuda_wheels: "false" packages: @@ -346,6 +347,7 @@ dependencies: - matrix: packages: - cuda-toolkit[cublas,curand,cusolver,cusparse]>=12,<14 + - nvidia-nvjitlink>=13,<14 depends_on_cupy: common: - output_types: conda @@ -540,25 +542,6 @@ dependencies: - output_types: conda packages: - libcuvs-tests==25.12.*,>=0.0.0a0 - depends_on_libnvjitlink_dev: - common: - - output_types: conda - packages: - - libnvjitlink-dev - specific: - - output_types: [requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - cuda_suffixed: "true" - packages: - - nvidia-nvjitlink-cu12 - - matrix: - cuda: "13.*" - cuda_suffixed: "false" - packages: - - nvidia-nvjitlink=13.* - - {matrix: null, packages: [nvidia-nvjitlink]} depends_on_libraft: common: - output_types: conda diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml index 6c30d91153..ef8e7f9c8a 100644 --- a/python/libcuvs/pyproject.toml +++ b/python/libcuvs/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "cuda-toolkit[cublas,curand,cusolver,cusparse]>=12,<14", "libraft==25.12.*,>=0.0.0a0", "librmm==25.12.*,>=0.0.0a0", - "nvidia-nvjitlink", + "nvidia-nvjitlink>=13,<14", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -94,7 +94,6 @@ requires = [ "libraft==25.12.*,>=0.0.0a0", "librmm==25.12.*,>=0.0.0a0", "ninja", - "nvidia-nvjitlink", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" From e87a8c79f32c2742716eaff708ea9f1c68686732 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Oct 2025 03:04:06 +0000 Subject: [PATCH 015/158] reorder if-else in compiler check --- cpp/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f69726a026..0efe375b51 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -588,10 +588,10 @@ if(NOT BUILD_CPU_ONLY) ) set(JIT_LTO_TARGET_ARCHITECTURE "") - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0) - set(JIT_LTO_TARGET_ARCHITECTURE "70-real") - elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) set(JIT_LTO_TARGET_ARCHITECTURE "75-real") + elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0) + set(JIT_LTO_TARGET_ARCHITECTURE "70-real") endif() set_target_properties( From 32a67bdc079c8ba64a6c8c690b3c4d027bccee2a Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Oct 2025 20:40:12 +0000 Subject: [PATCH 016/158] use cudart apis --- ci/build_wheel.sh | 1 - .../all_cuda-129_arch-aarch64.yaml | 1 - .../all_cuda-129_arch-x86_64.yaml | 1 - .../all_cuda-130_arch-aarch64.yaml | 1 - .../all_cuda-130_arch-x86_64.yaml | 1 - .../bench_ann_cuda-129_arch-aarch64.yaml | 1 - .../bench_ann_cuda-129_arch-x86_64.yaml | 1 - .../bench_ann_cuda-130_arch-aarch64.yaml | 1 - .../bench_ann_cuda-130_arch-x86_64.yaml | 1 - .../go_cuda-129_arch-aarch64.yaml | 1 - .../environments/go_cuda-129_arch-x86_64.yaml | 1 - .../go_cuda-130_arch-aarch64.yaml | 1 - .../environments/go_cuda-130_arch-x86_64.yaml | 1 - .../rust_cuda-129_arch-aarch64.yaml | 1 - .../rust_cuda-129_arch-x86_64.yaml | 1 - .../rust_cuda-130_arch-aarch64.yaml | 1 - .../rust_cuda-130_arch-x86_64.yaml | 1 - conda/recipes/libcuvs/recipe.yaml | 4 - cpp/CMakeLists.txt | 17 +- .../neighbors/ivf_flat_baseline_benchmark.cu | 251 ++++++++++++++++++ cpp/bench/neighbors/ivf_flat_jit_benchmark.cu | 249 +++++++++++++++++ .../generate_interleaved_scan_kernels.cmake | 12 +- .../cuvs/detail/jit_lto/AlgorithmLauncher.h | 10 +- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 27 +- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 14 +- dependencies.yaml | 1 - 26 files changed, 531 insertions(+), 71 deletions(-) create mode 100644 cpp/bench/neighbors/ivf_flat_baseline_benchmark.cu create mode 100644 cpp/bench/neighbors/ivf_flat_jit_benchmark.cu diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 305af498ec..48bea18916 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -17,7 +17,6 @@ cd "${package_dir}" EXCLUDE_ARGS=( --exclude "libcublas.so.*" --exclude "libcublasLt.so.*" - --exclude "libcuda.so.*" --exclude "libcurand.so.*" --exclude "libcusolver.so.*" --exclude "libcusparse.so.*" diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 5b2e3bd896..c7d0d8b659 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -12,7 +12,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 979cde0845..0969780718 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -12,7 +12,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index 985cb902d3..04ccd3496b 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -12,7 +12,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index 63102ecb38..1c5d09dda0 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -12,7 +12,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml index dc7b4a0a56..2d11678e7b 100644 --- a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml @@ -12,7 +12,6 @@ dependencies: - click - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml index ec98a44b9f..82ca29dbdd 100644 --- a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml @@ -12,7 +12,6 @@ dependencies: - click - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml index 9d445301dc..45dc071a2f 100644 --- a/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-aarch64.yaml @@ -12,7 +12,6 @@ dependencies: - click - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml index 14acd0351e..70c32bf03f 100644 --- a/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-130_arch-x86_64.yaml @@ -12,7 +12,6 @@ dependencies: - click - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/go_cuda-129_arch-aarch64.yaml b/conda/environments/go_cuda-129_arch-aarch64.yaml index 9c9f491ddf..45e8f94697 100644 --- a/conda/environments/go_cuda-129_arch-aarch64.yaml +++ b/conda/environments/go_cuda-129_arch-aarch64.yaml @@ -11,7 +11,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/go_cuda-129_arch-x86_64.yaml b/conda/environments/go_cuda-129_arch-x86_64.yaml index b191ea9a9b..ce137edfce 100644 --- a/conda/environments/go_cuda-129_arch-x86_64.yaml +++ b/conda/environments/go_cuda-129_arch-x86_64.yaml @@ -11,7 +11,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/go_cuda-130_arch-aarch64.yaml b/conda/environments/go_cuda-130_arch-aarch64.yaml index 1583536989..c6fd84a0d3 100644 --- a/conda/environments/go_cuda-130_arch-aarch64.yaml +++ b/conda/environments/go_cuda-130_arch-aarch64.yaml @@ -11,7 +11,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/go_cuda-130_arch-x86_64.yaml b/conda/environments/go_cuda-130_arch-x86_64.yaml index 2ca05f88a5..90bf0dc636 100644 --- a/conda/environments/go_cuda-130_arch-x86_64.yaml +++ b/conda/environments/go_cuda-130_arch-x86_64.yaml @@ -11,7 +11,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/rust_cuda-129_arch-aarch64.yaml b/conda/environments/rust_cuda-129_arch-aarch64.yaml index 8ce13c4204..216ea42da4 100644 --- a/conda/environments/rust_cuda-129_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-129_arch-aarch64.yaml @@ -10,7 +10,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/rust_cuda-129_arch-x86_64.yaml b/conda/environments/rust_cuda-129_arch-x86_64.yaml index 2dad738655..e9b8726a47 100644 --- a/conda/environments/rust_cuda-129_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-129_arch-x86_64.yaml @@ -10,7 +10,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/rust_cuda-130_arch-aarch64.yaml b/conda/environments/rust_cuda-130_arch-aarch64.yaml index 8a5ac763a8..1c3317bbb6 100644 --- a/conda/environments/rust_cuda-130_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-130_arch-aarch64.yaml @@ -10,7 +10,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/environments/rust_cuda-130_arch-x86_64.yaml b/conda/environments/rust_cuda-130_arch-x86_64.yaml index bdb40cc9a8..bb2d413b94 100644 --- a/conda/environments/rust_cuda-130_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-130_arch-x86_64.yaml @@ -10,7 +10,6 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev -- cuda-driver-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api diff --git a/conda/recipes/libcuvs/recipe.yaml b/conda/recipes/libcuvs/recipe.yaml index bd820be51a..4516c4191c 100644 --- a/conda/recipes/libcuvs/recipe.yaml +++ b/conda/recipes/libcuvs/recipe.yaml @@ -102,7 +102,6 @@ outputs: - nccl ${{ nccl_version }} - cuda-version =${{ cuda_version }} - cuda-cudart-dev - - cuda-driver-dev - cuda-profiler-api - libcublas-dev - libcurand-dev @@ -122,7 +121,6 @@ outputs: ignore_run_exports: by_name: - cuda-cudart - - cuda-driver-dev - cuda-version - libaio - libboost @@ -186,7 +184,6 @@ outputs: - libcurand - libcusolver - libcusparse - - libcuda.so.1 - librmm - mkl - nccl @@ -273,7 +270,6 @@ outputs: - libcurand - libcusolver - libcusparse - - libcuda.so.1 - librmm - mkl - nccl diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0efe375b51..e69c4bdd7a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -144,7 +144,6 @@ if(DETECT_CONDA_ENV) ) set(CMAKE_INSTALL_PREFIX "$ENV{CONDA_PREFIX}") endif() - set(CONDA_STUB_PATH "$ENV{CONDA_PREFIX}/lib/stubs") endif() # ################################################################################################## @@ -335,10 +334,6 @@ if(NOT BUILD_CPU_ONLY) ) endif() - set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/FragmentDatabase.cu - src/detail/jit_lto/FragmentEntry.cu - ) - add_library( cuvs_objs OBJECT src/cluster/kmeans_balanced_fit_float.cu @@ -534,7 +529,6 @@ if(NOT BUILD_CPU_ONLY) src/stats/silhouette_score.cu src/stats/trustworthiness_score.cu ${CUVS_MG_ALGOS} - ${JIT_LTO_FILES} ) set_target_properties( @@ -608,7 +602,11 @@ if(NOT BUILD_CPU_ONLY) ) target_link_libraries(jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) - add_library(jit_lto_fatbins_as_cpp_sources STATIC src/detail/jit_lto/AlgorithmPlanner.cu) + add_library( + jit_lto_fatbins_as_cpp_sources STATIC + src/detail/jit_lto/AlgorithmPlanner.cu src/detail/jit_lto/AlgorithmLauncher.cu + src/detail/jit_lto/FragmentDatabase.cu src/detail/jit_lto/FragmentEntry.cu + ) # Set PIC for the static library since it will be linked into a shared library set_target_properties( @@ -624,11 +622,6 @@ if(NOT BUILD_CPU_ONLY) "$" ) - target_link_directories( - jit_lto_fatbins_as_cpp_sources PRIVATE $<$:${CONDA_STUB_PATH}> - ) - target_link_libraries(jit_lto_fatbins_as_cpp_sources PRIVATE CUDA::cuda_driver) - embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) # Endian detection diff --git a/cpp/bench/neighbors/ivf_flat_baseline_benchmark.cu b/cpp/bench/neighbors/ivf_flat_baseline_benchmark.cu new file mode 100644 index 0000000000..4ff831a737 --- /dev/null +++ b/cpp/bench/neighbors/ivf_flat_baseline_benchmark.cu @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Baseline benchmark for non-JIT branch (production) +// Runs 3 searches to measure performance without JIT-LTO overhead + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +void print_usage(const char* program_name) +{ + std::cout << "Usage: " << program_name + << " --n_rows --n_dims --n_queries --k [options]\n" + << "\nRequired arguments:\n" + << " --n_rows Number of vectors in the dataset\n" + << " --n_dims Dimensionality of vectors\n" + << " --n_queries Number of query vectors\n" + << " --k Number of neighbors to find\n" + << "\nOptional arguments:\n" + << " --n_lists Number of IVF lists (default: sqrt(n_rows))\n" + << " --n_probes Number of probes during search (default: min(n_lists, 50))\n" + << " --metric Distance metric: l2, inner_product, cosine (default: l2)\n" + << " --help Display this help message\n"; +} + +struct BenchmarkParams { + int64_t n_rows; + int64_t n_dims; + int64_t n_queries; + uint32_t k; + uint32_t n_lists = 0; // 0 means auto-compute + uint32_t n_probes = 0; // 0 means auto-compute + std::string metric = "l2"; + + bool validate() const + { + if (n_rows <= 0 || n_dims <= 0 || n_queries <= 0 || k <= 0) { + std::cerr << "Error: All dimension parameters must be positive\n"; + return false; + } + if (metric != "l2" && metric != "inner_product" && metric != "cosine") { + std::cerr << "Error: Invalid metric. Must be l2, inner_product, or cosine\n"; + return false; + } + return true; + } +}; + +bool parse_args(int argc, char** argv, BenchmarkParams& params) +{ + if (argc < 2) { + print_usage(argv[0]); + return false; + } + + for (int i = 1; i < argc; i++) { + std::string arg = argv[i]; + + if (arg == "--help" || arg == "-h") { + print_usage(argv[0]); + return false; + } else if (arg == "--n_rows" && i + 1 < argc) { + params.n_rows = std::stoll(argv[++i]); + } else if (arg == "--n_dims" && i + 1 < argc) { + params.n_dims = std::stoll(argv[++i]); + } else if (arg == "--n_queries" && i + 1 < argc) { + params.n_queries = std::stoll(argv[++i]); + } else if (arg == "--k" && i + 1 < argc) { + params.k = std::stoul(argv[++i]); + } else if (arg == "--n_lists" && i + 1 < argc) { + params.n_lists = std::stoul(argv[++i]); + } else if (arg == "--n_probes" && i + 1 < argc) { + params.n_probes = std::stoul(argv[++i]); + } else if (arg == "--metric" && i + 1 < argc) { + params.metric = argv[++i]; + } else { + std::cerr << "Error: Unknown argument '" << arg << "'\n"; + print_usage(argv[0]); + return false; + } + } + + return params.validate(); +} + +cuvs::distance::DistanceType get_metric_type(const std::string& metric) +{ + if (metric == "l2") { + return cuvs::distance::DistanceType::L2Expanded; + } else if (metric == "inner_product") { + return cuvs::distance::DistanceType::InnerProduct; + } else if (metric == "cosine") { + return cuvs::distance::DistanceType::CosineExpanded; + } + return cuvs::distance::DistanceType::L2Expanded; +} + +int main(int argc, char** argv) +{ + BenchmarkParams params; + + if (!parse_args(argc, argv, params)) { return 1; } + + // Auto-compute n_lists and n_probes if not specified + if (params.n_lists == 0) { + params.n_lists = std::max(1u, static_cast(std::sqrt(params.n_rows))); + } + if (params.n_probes == 0) { params.n_probes = std::min(params.n_lists, 50u); } + + std::cout << "\n=== IVF Flat Baseline Benchmark (No JIT) ===\n"; + std::cout << "Dataset size: " << params.n_rows << " x " << params.n_dims << "\n"; + std::cout << "Query size: " << params.n_queries << "\n"; + std::cout << "k: " << params.k << "\n"; + std::cout << "n_lists: " << params.n_lists << "\n"; + std::cout << "n_probes: " << params.n_probes << "\n"; + std::cout << "metric: " << params.metric << "\n"; + std::cout << "============================================\n\n"; + + try { + // Initialize RAFT resources + raft::device_resources handle; + auto stream = raft::resource::get_cuda_stream(handle); + + // Generate random dataset + std::cout << "Generating random dataset...\n"; + auto dataset = raft::make_device_matrix(handle, params.n_rows, params.n_dims); + auto queries = + raft::make_device_matrix(handle, params.n_queries, params.n_dims); + + raft::random::RngState rng(42ULL); + raft::random::uniform( + handle, rng, dataset.data_handle(), params.n_rows * params.n_dims, 0.0f, 1.0f); + raft::random::uniform( + handle, rng, queries.data_handle(), params.n_queries * params.n_dims, 0.0f, 1.0f); + raft::resource::sync_stream(handle); + + // Build index + std::cout << "Building IVF Flat index...\n"; + auto build_start = std::chrono::high_resolution_clock::now(); + + cuvs::neighbors::ivf_flat::index_params index_params; + index_params.n_lists = params.n_lists; + index_params.metric = get_metric_type(params.metric); + index_params.adaptive_centers = false; + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 1.0; + + auto index = cuvs::neighbors::ivf_flat::build( + handle, index_params, raft::make_const_mdspan(dataset.view())); + raft::resource::sync_stream(handle); + + auto build_end = std::chrono::high_resolution_clock::now(); + auto build_time = + std::chrono::duration_cast(build_end - build_start).count(); + std::cout << "Build time: " << build_time << " ms\n\n"; + + // Prepare output buffers + auto neighbors = raft::make_device_matrix(handle, params.n_queries, params.k); + auto distances = raft::make_device_matrix(handle, params.n_queries, params.k); + + // Search parameters + cuvs::neighbors::ivf_flat::search_params search_params; + search_params.n_probes = params.n_probes; + + // Run search 21 times (1 cold + 20 warm, like JIT benchmark) + constexpr int num_runs = 21; + std::vector search_times; + search_times.reserve(num_runs); + + std::cout << "\nRunning " << num_runs << " searches (1 cold + 20 warm)...\n"; + + for (int run = 0; run < num_runs; run++) { + // Synchronize before timing + raft::resource::sync_stream(handle); + + auto search_start = std::chrono::high_resolution_clock::now(); + + cuvs::neighbors::ivf_flat::search( + handle, search_params, index, queries.view(), neighbors.view(), distances.view()); + + // Synchronize after search to ensure completion + raft::resource::sync_stream(handle); + + auto search_end = std::chrono::high_resolution_clock::now(); + auto search_time_us = + std::chrono::duration_cast(search_end - search_start).count(); + + search_times.push_back(search_time_us / 1000.0); // Convert to milliseconds + + if (run == 0) { + std::cout << "Run 1 (cold): " << search_times[run] << " ms\n"; + } else { + std::cout << "Run " << (run + 1) << " (warm): " << search_times[run] << " ms\n"; + } + } + + // Calculate statistics + double first_run = search_times[0]; + double warm_total = 0.0; + double min_warm = search_times[1]; + double max_warm = search_times[1]; + + // Average of runs 2-4 (warm runs) + for (int i = 1; i < num_runs; i++) { + warm_total += search_times[i]; + min_warm = std::min(min_warm, search_times[i]); + max_warm = std::max(max_warm, search_times[i]); + } + + double avg_warm_time = warm_total / (num_runs - 1); + double all_runs_avg = (first_run + warm_total) / num_runs; + + std::cout << "\n=== Results ===\n"; + std::cout << "First run (cold): " << first_run << " ms\n"; + std::cout << "Average time (runs 2-21): " << avg_warm_time << " ms\n"; + std::cout << "Min warm time: " << min_warm << " ms\n"; + std::cout << "Max warm time: " << max_warm << " ms\n"; + std::cout << "Overall average: " << all_runs_avg << " ms\n"; + std::cout << "Cold run overhead: " << (first_run - avg_warm_time) << " ms\n"; + std::cout << "Throughput (after warmup): " << (params.n_queries / (avg_warm_time / 1000.0)) + << " queries/sec\n"; + std::cout << "===============\n"; + + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } + + return 0; +} diff --git a/cpp/bench/neighbors/ivf_flat_jit_benchmark.cu b/cpp/bench/neighbors/ivf_flat_jit_benchmark.cu new file mode 100644 index 0000000000..99f9e6881e --- /dev/null +++ b/cpp/bench/neighbors/ivf_flat_jit_benchmark.cu @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +void print_usage(const char* program_name) +{ + std::cout << "Usage: " << program_name + << " --n_rows --n_dims --n_queries --k [options]\n" + << "\nRequired arguments:\n" + << " --n_rows Number of vectors in the dataset\n" + << " --n_dims Dimensionality of vectors\n" + << " --n_queries Number of query vectors\n" + << " --k Number of neighbors to find\n" + << "\nOptional arguments:\n" + << " --n_lists Number of IVF lists (default: sqrt(n_rows))\n" + << " --n_probes Number of probes during search (default: min(n_lists, 50))\n" + << " --metric Distance metric: l2, inner_product, cosine (default: l2)\n" + << " --help Display this help message\n"; +} + +struct BenchmarkParams { + int64_t n_rows; + int64_t n_dims; + int64_t n_queries; + uint32_t k; + uint32_t n_lists = 0; // 0 means auto-compute + uint32_t n_probes = 0; // 0 means auto-compute + std::string metric = "l2"; + + bool validate() const + { + if (n_rows <= 0 || n_dims <= 0 || n_queries <= 0 || k <= 0) { + std::cerr << "Error: All dimension parameters must be positive\n"; + return false; + } + if (metric != "l2" && metric != "inner_product" && metric != "cosine") { + std::cerr << "Error: Invalid metric. Must be l2, inner_product, or cosine\n"; + return false; + } + return true; + } +}; + +bool parse_args(int argc, char** argv, BenchmarkParams& params) +{ + if (argc < 2) { + print_usage(argv[0]); + return false; + } + + for (int i = 1; i < argc; i++) { + std::string arg = argv[i]; + + if (arg == "--help" || arg == "-h") { + print_usage(argv[0]); + return false; + } else if (arg == "--n_rows" && i + 1 < argc) { + params.n_rows = std::stoll(argv[++i]); + } else if (arg == "--n_dims" && i + 1 < argc) { + params.n_dims = std::stoll(argv[++i]); + } else if (arg == "--n_queries" && i + 1 < argc) { + params.n_queries = std::stoll(argv[++i]); + } else if (arg == "--k" && i + 1 < argc) { + params.k = std::stoul(argv[++i]); + } else if (arg == "--n_lists" && i + 1 < argc) { + params.n_lists = std::stoul(argv[++i]); + } else if (arg == "--n_probes" && i + 1 < argc) { + params.n_probes = std::stoul(argv[++i]); + } else if (arg == "--metric" && i + 1 < argc) { + params.metric = argv[++i]; + } else { + std::cerr << "Error: Unknown argument '" << arg << "'\n"; + print_usage(argv[0]); + return false; + } + } + + return params.validate(); +} + +cuvs::distance::DistanceType get_metric_type(const std::string& metric) +{ + if (metric == "l2") { + return cuvs::distance::DistanceType::L2Expanded; + } else if (metric == "inner_product") { + return cuvs::distance::DistanceType::InnerProduct; + } else if (metric == "cosine") { + return cuvs::distance::DistanceType::CosineExpanded; + } + return cuvs::distance::DistanceType::L2Expanded; +} + +int main(int argc, char** argv) +{ + BenchmarkParams params; + + if (!parse_args(argc, argv, params)) { return 1; } + + // Auto-compute n_lists and n_probes if not specified + if (params.n_lists == 0) { + params.n_lists = std::max(1u, static_cast(std::sqrt(params.n_rows))); + } + if (params.n_probes == 0) { params.n_probes = std::min(params.n_lists, 50u); } + + std::cout << "\n=== IVF Flat JIT LTO Benchmark ===\n"; + std::cout << "Dataset size: " << params.n_rows << " x " << params.n_dims << "\n"; + std::cout << "Query size: " << params.n_queries << "\n"; + std::cout << "k: " << params.k << "\n"; + std::cout << "n_lists: " << params.n_lists << "\n"; + std::cout << "n_probes: " << params.n_probes << "\n"; + std::cout << "metric: " << params.metric << "\n"; + std::cout << "==================================\n\n"; + + try { + // Initialize RAFT resources + raft::device_resources handle; + auto stream = raft::resource::get_cuda_stream(handle); + + // Generate random dataset + std::cout << "Generating random dataset...\n"; + auto dataset = raft::make_device_matrix(handle, params.n_rows, params.n_dims); + auto queries = + raft::make_device_matrix(handle, params.n_queries, params.n_dims); + + raft::random::RngState rng(42ULL); + raft::random::uniform( + handle, rng, dataset.data_handle(), params.n_rows * params.n_dims, 0.0f, 1.0f); + raft::random::uniform( + handle, rng, queries.data_handle(), params.n_queries * params.n_dims, 0.0f, 1.0f); + raft::resource::sync_stream(handle); + + // Build index + std::cout << "Building IVF Flat index...\n"; + auto build_start = std::chrono::high_resolution_clock::now(); + + cuvs::neighbors::ivf_flat::index_params index_params; + index_params.n_lists = params.n_lists; + index_params.metric = get_metric_type(params.metric); + index_params.adaptive_centers = false; + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 1.0; + + auto index = cuvs::neighbors::ivf_flat::build( + handle, index_params, raft::make_const_mdspan(dataset.view())); + raft::resource::sync_stream(handle); + + auto build_end = std::chrono::high_resolution_clock::now(); + auto build_time = + std::chrono::duration_cast(build_end - build_start).count(); + std::cout << "Build time: " << build_time << " ms\n\n"; + + // Prepare output buffers + auto neighbors = raft::make_device_matrix(handle, params.n_queries, params.k); + auto distances = raft::make_device_matrix(handle, params.n_queries, params.k); + + // Search parameters + cuvs::neighbors::ivf_flat::search_params search_params; + search_params.n_probes = params.n_probes; + + // Run search 21 times: 1 cold + 20 warm + constexpr int num_warmup_runs = 1; + constexpr int num_timed_runs = 20; + constexpr int total_runs = num_warmup_runs + num_timed_runs; + + std::vector search_times; + search_times.reserve(total_runs); + + for (int run = 0; run < total_runs; run++) { + // Synchronize before timing + raft::resource::sync_stream(handle); + + auto search_start = std::chrono::high_resolution_clock::now(); + + cuvs::neighbors::ivf_flat::search( + handle, search_params, index, queries.view(), neighbors.view(), distances.view()); + + // Synchronize after search to ensure completion + raft::resource::sync_stream(handle); + + auto search_end = std::chrono::high_resolution_clock::now(); + auto search_time_us = + std::chrono::duration_cast(search_end - search_start).count(); + + search_times.push_back(search_time_us / 1000.0); // Convert to milliseconds + + if (run == 0) { + std::cout << "Run " << (run + 1) + << " (First run - includes JIT LTO overhead): " << search_times[run] << " ms\n"; + } else { + std::cout << "Run " << (run + 1) << ": " << search_times[run] << " ms\n"; + } + } + + // Calculate statistics + double first_run_time = search_times[0]; + double avg_warmup_time = 0.0; + for (int i = 1; i < total_runs; i++) { + avg_warmup_time += search_times[i]; + } + avg_warmup_time /= num_timed_runs; + + double jit_overhead = first_run_time - avg_warmup_time; + + std::cout << "\n=== Results ===\n"; + std::cout << "First run time (with JIT LTO): " << first_run_time << " ms\n"; + std::cout << "Average time (runs 2-21): " << avg_warmup_time << " ms\n"; + std::cout << "Estimated JIT LTO overhead: " << jit_overhead << " ms\n"; + std::cout << "Throughput (after warmup): " + << (params.n_queries / (avg_warmup_time / 1000.0)) << " queries/sec\n"; + + // Calculate speedup after caching + if (jit_overhead > 0) { + double speedup = first_run_time / avg_warmup_time; + std::cout << "Speedup after warmup: " << speedup << "x\n"; + } + + std::cout << "===============\n"; + + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } + + return 0; +} diff --git a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake index 9e15cff5ab..2cc76f8f28 100644 --- a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake +++ b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake @@ -21,20 +21,18 @@ function(generate_interleaved_scan_kernels) ) set(OUTPUT_BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated_kernels) set(CMAKE_LIST_FILE ${OUTPUT_BASE_DIR}/interleaved_scan.cmake) - set(STAMP_FILE ${CMAKE_CURRENT_BINARY_DIR}/kernels_generated.stamp) # Generate the kernels at build time add_custom_command( - OUTPUT ${STAMP_FILE} + OUTPUT ${CMAKE_LIST_FILE} COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} ${OUTPUT_BASE_DIR} - COMMAND ${CMAKE_COMMAND} -E touch ${STAMP_FILE} DEPENDS ${GENERATOR_SCRIPT} COMMENT "Generating interleaved scan kernel files..." VERBATIM ) - # Create a custom target that depends on the stamp file - add_custom_target(generate_interleaved_scan_kernels_target DEPENDS ${STAMP_FILE}) + # Create a custom target that depends on the generated CMake file + add_custom_target(generate_interleaved_scan_kernels_target DEPENDS ${CMAKE_LIST_FILE}) # Include the generated CMake list file Only generate if the CMake list file doesn't exist if(NOT EXISTS ${CMAKE_LIST_FILE}) @@ -98,10 +96,6 @@ function(generate_interleaved_scan_kernels) ${FULL_PATH_POST_LAMBDA_FILES} PARENT_SCOPE ) - set(INTERLEAVED_SCAN_KERNELS_STAMP - ${STAMP_FILE} - PARENT_SCOPE - ) set(INTERLEAVED_SCAN_KERNELS_TARGET generate_interleaved_scan_kernels_target PARENT_SCOPE diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h index d567d6e138..ac4807009c 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h @@ -17,18 +17,18 @@ #pragma once #include +#include #include #include #include -#include #include #include struct AlgorithmLauncher { AlgorithmLauncher() = default; - AlgorithmLauncher(CUlibrary l, CUkernel k); + AlgorithmLauncher(cudaLibrary_t l, cudaKernel_t k); template void operator()( @@ -38,12 +38,12 @@ struct AlgorithmLauncher { this->call(stream, grid, block, shared_mem, kernel_args); } - CUkernel get_kernel() { return this->kernel; } + cudaKernel_t get_kernel() { return this->kernel; } private: void call(cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** args); - CUlibrary library; - CUkernel kernel; + cudaLibrary_t library; + cudaKernel_t kernel; }; std::unordered_map& get_cached_launchers(); diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index d3aecac5d8..2e0004f75c 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -18,28 +18,23 @@ #include -AlgorithmLauncher::AlgorithmLauncher(CUlibrary l, CUkernel k) : library{l}, kernel{k} {} +AlgorithmLauncher::AlgorithmLauncher(cudaLibrary_t l, cudaKernel_t k) : library{l}, kernel{k} {} void AlgorithmLauncher::call( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) { - CUlaunchAttribute attribute[1]; - attribute[0].id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION; - attribute[0].value.programmaticStreamSerializationAllowed = 1; + cudaLaunchAttribute attribute[1]; + attribute[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; + attribute[0].val.programmaticStreamSerializationAllowed = 1; - CUlaunchConfig config{}; - config.gridDimX = grid.x; - config.gridDimY = grid.y; - config.gridDimZ = grid.z; - config.blockDimX = block.x; - config.blockDimY = block.y; - config.blockDimZ = block.z; - config.sharedMemBytes = shared_mem; - config.hStream = stream; - config.attrs = attribute; - config.numAttrs = 1; + cudaLaunchConfig_t config; + config.gridDim = grid; + config.blockDim = block; + config.stream = stream; + config.attrs = attribute; + config.numAttrs = 1; - cuLaunchKernelEx(&config, (CUfunction)kernel, kernel_args, 0); + cudaLaunchKernelExC(&config, kernel, kernel_args); } std::unordered_map& get_cached_launchers() diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 7507ed7616..8787b52c08 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -25,7 +25,7 @@ #include #include -#include "cuda.h" +#include "cuda_runtime.h" #include "nvJitLink.h" namespace { @@ -90,8 +90,8 @@ AlgorithmLauncher AlgorithmPlanner::build() int major = 0; int minor = 0; cudaGetDevice(&device); - cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device); - cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device); + cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device); + cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device); std::string archs = "-arch=sm_" + std::to_string((major * 10 + minor)); @@ -123,13 +123,13 @@ AlgorithmLauncher AlgorithmPlanner::build() check_nvjitlink_result(handle, result); // cubin is linked, so now load it - CUlibrary library; - cuLibraryLoadData(&library, cubin.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); + cudaLibrary_t library; + cudaLibraryLoadData(&library, cubin.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); unsigned int count = 1; // Still need to cache/compute the mangled name - std::unique_ptr kernels_{new CUkernel[count]}; - cuLibraryEnumerateKernels(kernels_.get(), count, library); + std::unique_ptr kernels_{new cudaKernel_t[count]}; + cudaLibraryEnumerateKernels(kernels_.get(), count, library); return AlgorithmLauncher{library, kernels_[0]}; } diff --git a/dependencies.yaml b/dependencies.yaml index f30e13b09e..03f8927e65 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -316,7 +316,6 @@ dependencies: packages: - cuda-nvtx-dev - cuda-cudart-dev - - cuda-driver-dev - cuda-profiler-api - libcublas-dev - libcurand-dev From a4b48b11b544d70f28ad04f0c812d90e775b52ec Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Oct 2025 21:40:00 +0000 Subject: [PATCH 017/158] attempt to link cudart --- cpp/CMakeLists.txt | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b00af6e2ed..c381d00c9a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -346,6 +346,10 @@ if(NOT BUILD_CPU_ONLY) ) endif() + set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/FragmentDatabase.cu + src/detail/jit_lto/FragmentEntry.cu + ) + add_library( cuvs_objs OBJECT src/cluster/kmeans_balanced_fit_float.cu @@ -541,6 +545,7 @@ if(NOT BUILD_CPU_ONLY) src/stats/silhouette_score.cu src/stats/trustworthiness_score.cu ${CUVS_MG_ALGOS} + ${JIT_LTO_FILES} ) set_target_properties( @@ -614,11 +619,7 @@ if(NOT BUILD_CPU_ONLY) ) target_link_libraries(jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) - add_library( - jit_lto_fatbins_as_cpp_sources STATIC - src/detail/jit_lto/AlgorithmPlanner.cu src/detail/jit_lto/AlgorithmLauncher.cu - src/detail/jit_lto/FragmentDatabase.cu src/detail/jit_lto/FragmentEntry.cu - ) + add_library(jit_lto_fatbins_as_cpp_sources STATIC src/detail/jit_lto/AlgorithmPlanner.cu) # Set PIC for the static library since it will be linked into a shared library set_target_properties( @@ -633,6 +634,7 @@ if(NOT BUILD_CPU_ONLY) jit_lto_fatbins_as_cpp_sources PRIVATE "$" "$" ) + target_link_libraries(jit_lto_fatbins_as_cpp_sources PRIVATE CUDA::cudart) embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) @@ -711,6 +713,7 @@ if(NOT BUILD_CPU_ONLY) cuvs-cagra-search $ CUDA::nvJitLink + CUDA::cudart ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries @@ -770,8 +773,11 @@ SECTIONS $> $> $<$:CUDA::nvtx3> - PRIVATE nvidia::cutlass::cutlass $ - $ CUDA::nvJitLink + PRIVATE nvidia::cutlass::cutlass + $ + $ + CUDA::nvJitLink + CUDA::cudart ) endif() From d5d692e405fa8a1014dce2afc30d1bbfdfefc144 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Oct 2025 23:17:05 +0000 Subject: [PATCH 018/158] revert cudart link, try all arch build of jit lto fatbin sources --- cpp/CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c381d00c9a..87bb9316a1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -623,8 +623,12 @@ if(NOT BUILD_CPU_ONLY) # Set PIC for the static library since it will be linked into a shared library set_target_properties( - jit_lto_fatbins_as_cpp_sources PROPERTIES CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} - POSITION_INDEPENDENT_CODE ON + jit_lto_fatbins_as_cpp_sources PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON ) # Make sure the kernels are generated before embedding fatbins @@ -634,7 +638,6 @@ if(NOT BUILD_CPU_ONLY) jit_lto_fatbins_as_cpp_sources PRIVATE "$" "$" ) - target_link_libraries(jit_lto_fatbins_as_cpp_sources PRIVATE CUDA::cudart) embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) @@ -713,7 +716,6 @@ if(NOT BUILD_CPU_ONLY) cuvs-cagra-search $ CUDA::nvJitLink - CUDA::cudart ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries @@ -777,7 +779,6 @@ SECTIONS $ $ CUDA::nvJitLink - CUDA::cudart ) endif() From 1c6dd948ae6611f86bfd5565c887244a6aa25753 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Oct 2025 23:21:13 +0000 Subject: [PATCH 019/158] cmake format --- cpp/CMakeLists.txt | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 87bb9316a1..93a561cafc 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -623,12 +623,12 @@ if(NOT BUILD_CPU_ONLY) # Set PIC for the static library since it will be linked into a shared library set_target_properties( - jit_lto_fatbins_as_cpp_sources PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON + jit_lto_fatbins_as_cpp_sources + PROPERTIES CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON ) # Make sure the kernels are generated before embedding fatbins @@ -775,10 +775,8 @@ SECTIONS $> $> $<$:CUDA::nvtx3> - PRIVATE nvidia::cutlass::cutlass - $ - $ - CUDA::nvJitLink + PRIVATE nvidia::cutlass::cutlass $ + $ CUDA::nvJitLink ) endif() From 30f5ab61897c3ca8b230cf76105acd97ba57e325 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 10 Oct 2025 01:14:12 +0000 Subject: [PATCH 020/158] missing shared mem setting --- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 2e0004f75c..cb623c6bf8 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -16,8 +16,6 @@ #include -#include - AlgorithmLauncher::AlgorithmLauncher(cudaLibrary_t l, cudaKernel_t k) : library{l}, kernel{k} {} void AlgorithmLauncher::call( @@ -28,11 +26,12 @@ void AlgorithmLauncher::call( attribute[0].val.programmaticStreamSerializationAllowed = 1; cudaLaunchConfig_t config; - config.gridDim = grid; - config.blockDim = block; - config.stream = stream; - config.attrs = attribute; - config.numAttrs = 1; + config.gridDim = grid; + config.blockDim = block; + config.stream = stream; + config.attrs = attribute; + config.numAttrs = 1; + config.dynamicSmemBytes = shared_mem; cudaLaunchKernelExC(&config, kernel, kernel_args); } From 9674969e9d7fc78b89bc1c3c264024b0d713caf4 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 22 Oct 2025 21:27:45 +0000 Subject: [PATCH 021/158] separate cuda 12 and 13 compilation --- cpp/CMakeLists.txt | 141 +- .../ivf_flat/ivf_flat_interleaved_scan.cuh | 1237 ++++++++++++++--- ...vf_flat_interleaved_scan_explicit_inst.cuh | 4 + .../ivf_flat_interleaved_scan_jit.cuh | 474 +++++++ .../ivf_flat/jit_lto_kernels/README.md | 9 +- 5 files changed, 1601 insertions(+), 264 deletions(-) create mode 100644 cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 93a561cafc..c1b4e3bbf2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -215,12 +215,21 @@ if(BUILD_CAGRA_HNSWLIB) include(cmake/thirdparty/get_hnswlib.cmake) endif() -# this is needed to embed fatbins to JIT at runtime -include(cmake/modules/embed_fatbins.cmake) +set(JIT_LTO_TARGET_ARCHITECTURE "") +set(JIT_LTO_COMPILATION OFF) +if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + set(JIT_LTO_TARGET_ARCHITECTURE "75-real") + set(JIT_LTO_COMPILATION ON) +endif() + +if(JIT_LTO_COMPILATION) + # this is needed to embed fatbins to JIT at runtime + include(cmake/modules/embed_fatbins.cmake) -# Generate interleaved scan kernel files at build time -include(cmake/modules/generate_interleaved_scan_kernels.cmake) -generate_interleaved_scan_kernels() + # Generate interleaved scan kernel files at build time + include(cmake/modules/generate_interleaved_scan_kernels.cmake) + generate_interleaved_scan_kernels() +endif() # ################################################################################################## # * cuvs --------------------------------------------------------------------- @@ -545,7 +554,7 @@ if(NOT BUILD_CPU_ONLY) src/stats/silhouette_score.cu src/stats/trustworthiness_score.cu ${CUVS_MG_ALGOS} - ${JIT_LTO_FILES} + $<$:${JIT_LTO_FILES}> ) set_target_properties( @@ -579,67 +588,63 @@ if(NOT BUILD_CPU_ONLY) INTERFACE "$" ) - add_library( - jit_lto_fatbins OBJECT ${INTERLEAVED_SCAN_KERNEL_FILES} ${METRIC_DEVICE_FUNCTION_FILES} - ${FILTER_DEVICE_FUNCTION_FILES} ${POST_LAMBDA_DEVICE_FUNCTION_FILES} - ) - - # Make sure the kernels are generated before we try to build them - add_dependencies(jit_lto_fatbins ${INTERLEAVED_SCAN_KERNELS_TARGET}) + if(JIT_LTO_COMPILATION) + add_library( + jit_lto_fatbins OBJECT ${INTERLEAVED_SCAN_KERNEL_FILES} ${METRIC_DEVICE_FUNCTION_FILES} + ${FILTER_DEVICE_FUNCTION_FILES} ${POST_LAMBDA_DEVICE_FUNCTION_FILES} + ) - target_compile_definitions(jit_lto_fatbins PRIVATE BUILD_KERNEL) - target_include_directories( - jit_lto_fatbins PRIVATE "$" - "$" - ) - target_compile_options(jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size) - target_compile_options( - jit_lto_fatbins PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" - ) + # Make sure the kernels are generated before we try to build them + add_dependencies(jit_lto_fatbins ${INTERLEAVED_SCAN_KERNELS_TARGET}) - set(JIT_LTO_TARGET_ARCHITECTURE "") - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) - set(JIT_LTO_TARGET_ARCHITECTURE "75-real") - elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0) - set(JIT_LTO_TARGET_ARCHITECTURE "70-real") - endif() + target_compile_definitions(jit_lto_fatbins PRIVATE BUILD_KERNEL) + target_include_directories( + jit_lto_fatbins PRIVATE "$" + "$" + ) + target_compile_options(jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size) + target_compile_options( + jit_lto_fatbins PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + ) - set_target_properties( - jit_lto_fatbins - PROPERTIES CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION ON - CUDA_FATBIN_COMPILATION ON - POSITION_INDEPENDENT_CODE ON - INTERPROCEDURAL_OPTIMIZATION ON - ) - target_link_libraries(jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) + set_target_properties( + jit_lto_fatbins + PROPERTIES CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + CUDA_SEPARABLE_COMPILATION ON + CUDA_FATBIN_COMPILATION ON + POSITION_INDEPENDENT_CODE ON + INTERPROCEDURAL_OPTIMIZATION ON + ) + target_link_libraries(jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) - add_library(jit_lto_fatbins_as_cpp_sources STATIC src/detail/jit_lto/AlgorithmPlanner.cu) + add_library(jit_lto_fatbins_as_cpp_sources STATIC src/detail/jit_lto/AlgorithmPlanner.cu) - # Set PIC for the static library since it will be linked into a shared library - set_target_properties( - jit_lto_fatbins_as_cpp_sources - PROPERTIES CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - ) + # Set PIC for the static library since it will be linked into a shared library + set_target_properties( + jit_lto_fatbins_as_cpp_sources + PROPERTIES CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + ) - # Make sure the kernels are generated before embedding fatbins - add_dependencies(jit_lto_fatbins_as_cpp_sources ${INTERLEAVED_SCAN_KERNELS_TARGET}) + # Make sure the kernels are generated before embedding fatbins + add_dependencies(jit_lto_fatbins_as_cpp_sources ${INTERLEAVED_SCAN_KERNELS_TARGET}) - target_include_directories( - jit_lto_fatbins_as_cpp_sources PRIVATE "$" - "$" - ) + target_include_directories( + jit_lto_fatbins_as_cpp_sources + PRIVATE "$" + "$" + ) - embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) + embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) + endif() # Endian detection include(TestBigEndian) @@ -711,11 +716,12 @@ if(NOT BUILD_CPU_ONLY) $> $> $<$:CUDA::nvtx3> - PRIVATE nvidia::cutlass::cutlass - $ - cuvs-cagra-search - $ - CUDA::nvJitLink + PRIVATE + nvidia::cutlass::cutlass + $ + cuvs-cagra-search + $<$:$> + $<$:CUDA::nvJitLink> ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries @@ -775,8 +781,11 @@ SECTIONS $> $> $<$:CUDA::nvtx3> - PRIVATE nvidia::cutlass::cutlass $ - $ CUDA::nvJitLink + PRIVATE + nvidia::cutlass::cutlass + $ + $<$:$> + $<$:CUDA::nvJitLink> ) endif() diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index 89acd0cb01..e3286ba548 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -17,9 +17,7 @@ #pragma once #include "../ivf_common.cuh" -#include "jit_lto_kernels/interleaved_scan_planner.hpp" -#include "jit_lto_kernels/interleaved_scan_tags.hpp" -#include +#include "../sample_filter.cuh" #include #include @@ -27,111 +25,955 @@ #include #include #include +#include #include // RAFT_CUDA_TRY +#include +#include #include +#include #include namespace cuvs::neighbors::ivf_flat::detail { -static constexpr int kThreadsPerBlock = 128; - using namespace cuvs::spatial::knn::detail; // NOLINT -// Constexpr mapping functions from actual types to tags -template -constexpr auto get_data_type_tag() -{ - if constexpr (std::is_same_v) { return tag_f{}; } - if constexpr (std::is_same_v) { return tag_h{}; } - if constexpr (std::is_same_v) { return tag_sc{}; } - if constexpr (std::is_same_v) { return tag_uc{}; } -} +constexpr int kThreadsPerBlock = 128; -template -constexpr auto get_acc_type_tag() +/** + * @brief Copy `n` elements per block from one place to another. + * + * @param[out] out target pointer (unique per block) + * @param[in] in source pointer + * @param n number of elements to copy + */ +template +__device__ inline void copy_vectorized(T* out, const T* in, uint32_t n) { - if constexpr (std::is_same_v) { return tag_acc_f{}; } - if constexpr (std::is_same_v) { return tag_acc_h{}; } - if constexpr (std::is_same_v) { return tag_acc_i{}; } - if constexpr (std::is_same_v) { return tag_acc_ui{}; } + constexpr int VecElems = VecBytes / sizeof(T); // NOLINT + using align_bytes = raft::Pow2<(size_t)VecBytes>; + if constexpr (VecElems > 1) { + using align_elems = raft::Pow2; + if (!align_bytes::areSameAlignOffsets(out, in)) { + return copy_vectorized<(VecBytes >> 1), T>(out, in, n); + } + { // process unaligned head + uint32_t head = align_bytes::roundUp(in) - in; + if (head > 0) { + copy_vectorized(out, in, head); + n -= head; + in += head; + out += head; + } + } + { // process main part vectorized + using vec_t = typename raft::IOType::Type; + copy_vectorized( + reinterpret_cast(out), reinterpret_cast(in), align_elems::div(n)); + } + { // process unaligned tail + uint32_t tail = align_elems::mod(n); + if (tail > 0) { + n -= tail; + copy_vectorized(out + n, in + n, tail); + } + } + } + if constexpr (VecElems <= 1) { + for (int i = threadIdx.x; i < n; i += blockDim.x) { + out[i] = in[i]; + } + } } -template -constexpr auto get_idx_type_tag() -{ - if constexpr (std::is_same_v) { return tag_idx_l{}; } -} +/** + * @brief Load a part of a vector from the index and from query, compute the (part of the) distance + * between them, and aggregate it using the provided Lambda; one structure per thread, per query, + * and per index item. + * + * @tparam kUnroll elements per loop (normally, kUnroll = WarpSize / Veclen) + * @tparam Lambda computing the part of the distance for one dimension and aggregating it: + * void (AccT& acc, AccT x, AccT y) + * @tparam Veclen size of the vectorized load + * @tparam T type of the data in the query and the index + * @tparam AccT type of the accumulated value (an optimization for 8bit values to be loaded as 32bit + * values) + */ +template +struct loadAndComputeDist { + Lambda compute_dist; + AccT& dist; + AccT& norm_query; + AccT& norm_data; -template -constexpr auto get_filter_type_tag() -{ - using namespace cuvs::neighbors::filtering; + __device__ __forceinline__ + loadAndComputeDist(AccT& dist, Lambda op, AccT& norm_query, AccT& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + { + } - // Determine the filter implementation tag - if constexpr (std::is_same_v) { - return tag_filter{}; + /** + * Load parts of vectors from the index and query and accumulates the partial distance. + * This version assumes the query is stored in shared memory. + * Every thread here processes exactly kUnroll * Veclen elements independently of others. + */ + template + __device__ __forceinline__ void runLoadShmemCompute(const T* const& data, + const T* query_shared, + IdxT loadIndex, + IdxT shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + T encV[Veclen]; + raft::ldg(encV, data + (loadIndex + j * kIndexGroupSize) * Veclen); + T queryRegs[Veclen]; + raft::lds(queryRegs, &query_shared[shmemIndex + j * Veclen]); +#pragma unroll + for (int k = 0; k < Veclen; ++k) { + compute_dist(dist, queryRegs[k], encV[k]); + if constexpr (ComputeNorm) { + norm_query += queryRegs[k] * queryRegs[k]; + norm_data += encV[k] * encV[k]; + } + } + } + } + + /** + * Load parts of vectors from the index and query and accumulates the partial distance. + * This version assumes the query is stored in the global memory and is different for every + * thread. One warp loads exactly WarpSize query elements at once and then reshuffles them into + * corresponding threads (`WarpSize / (kUnroll * Veclen)` elements per thread at once). + */ + template + __device__ __forceinline__ void runLoadShflAndCompute(const T*& data, + const T* query, + IdxT baseLoadIndex, + const int lane_id) + { + T queryReg = query[baseLoadIndex + lane_id]; + constexpr int stride = kUnroll * Veclen; + constexpr int totalIter = raft::WarpSize / stride; + constexpr int gmemStride = stride * kIndexGroupSize; +#pragma unroll + for (int i = 0; i < totalIter; ++i, data += gmemStride) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + T encV[Veclen]; + raft::ldg(encV, data + (lane_id + j * kIndexGroupSize) * Veclen); + const int d = (i * kUnroll + j) * Veclen; +#pragma unroll + for (int k = 0; k < Veclen; ++k) { + T q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, encV[k]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += encV[k] * encV[k]; + } + } + } + } } - if constexpr (std::is_same_v>) { - return tag_filter{}; + + /** + * Load parts of vectors from the index and query and accumulates the partial distance. + * This version augments `runLoadShflAndCompute` when `dim` is not a multiple of `WarpSize`. + */ + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const T*& data, const T* query, const int lane_id, const int dim, const int dimBlocks) + { + const int loadDim = dimBlocks + lane_id; + T queryReg = loadDim < dim ? query[loadDim] : T{0}; + const int loadDataIdx = lane_id * Veclen; + for (int d = 0; d < dim - dimBlocks; d += Veclen, data += kIndexGroupSize * Veclen) { + T enc[Veclen]; + raft::ldg(enc, data + loadDataIdx); +#pragma unroll + for (int k = 0; k < Veclen; k++) { + T q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, enc[k]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += enc[k] * enc[k]; + } + } + } } -} +}; -// template -// constexpr auto get_metric_tag() -// { -// // Get tags for T and AccT -// auto t_tag = get_data_type_tag(); -// auto acc_tag = get_acc_type_tag(); - -// // Check for euclidean_dist and return templated tag with tag types -// if constexpr (std::is_same_v>) { -// return tag_metric_euclidean{}; -// } -// // Check for inner_prod_dist and return templated tag with tag types -// if constexpr (std::is_same_v>) { -// return tag_metric_inner_product{}; -// } -// } - -template -constexpr auto get_metric_name() -{ - if constexpr (std::is_same_v>) { - return "euclidean"; +// This handles uint8_t 8, 16 Veclens +template +struct loadAndComputeDist { + Lambda compute_dist; + uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; + + __device__ __forceinline__ + loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + { } - if constexpr (std::is_same_v>) { - return "inner_prod"; + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { + constexpr int veclen_int = uint8_veclen / 4; // converting uint8_t veclens to int + loadIndex = loadIndex * veclen_int; +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + loadIndex + j * kIndexGroupSize * veclen_int); + uint32_t queryRegs[veclen_int]; + raft::lds(queryRegs, + reinterpret_cast(query_shared + shmemIndex) + j * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + compute_dist(dist, queryRegs[k], encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } + } + } } -} + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + constexpr int veclen_int = uint8_veclen / 4; // converting uint8_t veclens to int + uint32_t queryReg = + (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int stride = kUnroll * uint8_veclen; -template -constexpr auto get_filter_name() -{ - if constexpr (std::is_same_v>) { - return "filter_none"; +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + (lane_id + j * kIndexGroupSize) * veclen_int); + const int d = (i * kUnroll + j) * veclen_int; +#pragma unroll + for (int k = 0; k < veclen_int; ++k) { + uint32_t q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } + } + } + } } - if constexpr (std::is_same_v>) { - return "filter_bitset"; + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen_int = uint8_veclen / 4; + const int loadDim = dimBlocks + lane_id * 4; // Here 4 is for 1 - int + uint32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; + d += uint8_veclen, data += kIndexGroupSize * uint8_veclen) { + uint32_t enc[veclen_int]; + raft::ldg(enc, reinterpret_cast(data) + lane_id * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + uint32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); + compute_dist(dist, q, enc[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc[k], enc[k], norm_data); + } + } + } + } +}; + +// Keep this specialized uint8 Veclen = 4, because compiler is generating suboptimal code while +// using above common template of int2/int4 +template +struct loadAndComputeDist { + Lambda compute_dist; + uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; + + __device__ __forceinline__ + loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; + uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; + compute_dist(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + uint32_t queryReg = + (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int veclen = 4; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; + uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen = 4; + const int loadDim = dimBlocks + lane_id; + uint32_t queryReg = loadDim < dim ? reinterpret_cast(query)[loadDim] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + uint32_t enc = reinterpret_cast(data)[lane_id]; + uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); + compute_dist(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc, enc, norm_data); + } + } + } +}; + +template +struct loadAndComputeDist { + Lambda compute_dist; + uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; + + __device__ __forceinline__ + loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; + uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; + compute_dist(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + uint32_t queryReg = + (lane_id < 16) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int veclen = 2; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; + uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen = 2; + int loadDim = dimBlocks + lane_id * veclen; + uint32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + uint32_t enc = reinterpret_cast(data)[lane_id]; + uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); + compute_dist(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc, enc, norm_data); + } + } + } +}; + +template +struct loadAndComputeDist { + Lambda compute_dist; + uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; + + __device__ __forceinline__ + loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = data[loadIndex + j * kIndexGroupSize]; + uint32_t queryRegs = query_shared[shmemIndex + j]; + compute_dist(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query += queryRegs * queryRegs; + norm_data += encV * encV; + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + uint32_t queryReg = query[baseLoadIndex + lane_id]; + constexpr int veclen = 1; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = data[lane_id + j * kIndexGroupSize]; + uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += encV * encV; + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen = 1; + int loadDim = dimBlocks + lane_id; + uint32_t queryReg = loadDim < dim ? query[loadDim] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + uint32_t enc = data[lane_id]; + uint32_t q = raft::shfl(queryReg, d, raft::WarpSize); + compute_dist(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += enc * enc; + } + } + } +}; + +// This device function is for int8 veclens 4, 8 and 16 +template +struct loadAndComputeDist { + Lambda compute_dist; + int32_t& dist; + int32_t& norm_query; + int32_t& norm_data; + + __device__ __forceinline__ + loadAndComputeDist(int32_t& dist, Lambda op, int32_t& norm_query, int32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + { } -} -template -constexpr auto get_post_lambda_name() + __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, + const int8_t* query_shared, + int loadIndex, + int shmemIndex) + { + constexpr int veclen_int = int8_veclen / 4; // converting int8_t veclens to int + +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + (loadIndex + j * kIndexGroupSize) * veclen_int); + int32_t queryRegs[veclen_int]; + raft::lds(queryRegs, + reinterpret_cast(query_shared + shmemIndex) + j * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + compute_dist(dist, queryRegs[k], encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, + const int8_t* query, + int baseLoadIndex, + const int lane_id) + { + constexpr int veclen_int = int8_veclen / 4; // converting int8_t veclens to int + + int32_t queryReg = + (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int stride = kUnroll * int8_veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + (lane_id + j * kIndexGroupSize) * veclen_int); + const int d = (i * kUnroll + j) * veclen_int; +#pragma unroll + for (int k = 0; k < veclen_int; ++k) { + int32_t q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) + { + constexpr int veclen_int = int8_veclen / 4; + const int loadDim = dimBlocks + lane_id * 4; // Here 4 is for 1 - int; + int32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; d += int8_veclen, data += kIndexGroupSize * int8_veclen) { + int32_t enc[veclen_int]; + raft::ldg(enc, reinterpret_cast(data) + lane_id * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + int32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); // Here 4 is for 1 - int; + compute_dist(dist, q, enc[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc[k], enc[k], norm_data); + } + } + } + } +}; + +template +struct loadAndComputeDist { + Lambda compute_dist; + int32_t& dist; + int32_t& norm_query; + int32_t& norm_data; + __device__ __forceinline__ + loadAndComputeDist(int32_t& dist, Lambda op, int32_t& norm_query, int32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + { + } + __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, + const int8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; + int32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; + compute_dist(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, + const int8_t* query, + int baseLoadIndex, + const int lane_id) + { + int32_t queryReg = + (lane_id < 16) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int veclen = 2; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; + int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryReg, queryReg, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) + { + constexpr int veclen = 2; + int loadDim = dimBlocks + lane_id * veclen; + int32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + int32_t enc = reinterpret_cast(data + lane_id * veclen)[0]; + int32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); + compute_dist(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc, enc, norm_data); + } + } + } +}; + +template +struct loadAndComputeDist { + Lambda compute_dist; + int32_t& dist; + int32_t& norm_query; + int32_t& norm_data; + __device__ __forceinline__ + loadAndComputeDist(int32_t& dist, Lambda op, int32_t& norm_query, int32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, + const int8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + compute_dist(dist, query_shared[shmemIndex + j], data[loadIndex + j * kIndexGroupSize]); + if constexpr (ComputeNorm) { + norm_query += int32_t{query_shared[shmemIndex + j]} * int32_t{query_shared[shmemIndex + j]}; + norm_data += int32_t{data[loadIndex + j * kIndexGroupSize]} * + int32_t{data[loadIndex + j * kIndexGroupSize]}; + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, + const int8_t* query, + int baseLoadIndex, + const int lane_id) + { + constexpr int veclen = 1; + constexpr int stride = kUnroll * veclen; + int32_t queryReg = query[baseLoadIndex + lane_id]; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, data[lane_id + j * kIndexGroupSize]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += data[lane_id + j * kIndexGroupSize] * data[lane_id + j * kIndexGroupSize]; + } + } + } + } + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) + { + constexpr int veclen = 1; + const int loadDim = dimBlocks + lane_id; + int32_t queryReg = loadDim < dim ? query[loadDim] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + int32_t q = raft::shfl(queryReg, d, raft::WarpSize); + compute_dist(dist, q, data[lane_id]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += int32_t{data[lane_id]} * int32_t{data[lane_id]}; + } + } + } +}; + +// switch to dummy blocksort when Capacity is 0 this explicit dummy is chosen +// to support access to warpsort constants like ::queue_t::kDummy +template +struct flat_block_sort { + using type = raft::matrix::detail::select::warpsort::block_sort< + raft::matrix::detail::select::warpsort::warp_sort_filtered, + Capacity, + Ascending, + T, + IdxT>; +}; + +template +struct flat_block_sort<0, Ascending, T, IdxT> + : ivf::detail::dummy_block_sort_t { + using type = ivf::detail::dummy_block_sort_t; +}; + +template +using block_sort_t = typename flat_block_sort::type; + +/** + * Scan clusters for nearest neighbors of the query vectors. + * See `ivfflat_interleaved_scan` for more information. + * + * The clusters are stored in the interleaved index format described in ivf_flat_types.hpp. + * For each query vector, a set of clusters is probed: the distance to each vector in the cluster is + * calculated, and the top-k nearest neighbors are selected. + * + * @param compute_dist distance function + * @param query_smem_elems number of dimensions of the query vector to fit in a shared memory of a + * block; this number must be a multiple of `WarpSize * Veclen`. + * @param[in] query a pointer to all queries in a row-major contiguous format [gridDim.y, dim] + * @param[in] coarse_index a pointer to the cluster indices to search through [n_probes] + * @param[in] list_indices index.indices + * @param[in] list_data index.data + * @param[in] list_sizes index.list_sizes + * @param[in] list_offsets index.list_offsets + * @param n_probes + * @param k + * @param dim + * @param sample_filter + * @param[out] neighbors + * @param[out] distances + */ +template +RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) + interleaved_scan_kernel(Lambda compute_dist, + PostLambda post_process, + const uint32_t query_smem_elems, + const T* query, + const uint32_t* coarse_index, + const T* const* list_data_ptrs, + const uint32_t* list_sizes, + const uint32_t queries_offset, + const uint32_t n_probes, + const uint32_t k, + const uint32_t max_samples, + const uint32_t* chunk_indices, + const uint32_t dim, + IvfSampleFilterT sample_filter, + uint32_t* neighbors, + float* distances) { - if constexpr (std::is_same_v) { return "post_identity"; } - if constexpr (std::is_same_v) { return "post_sqrt"; } - if constexpr (std::is_same_v) { return "post_compose"; } + extern __shared__ __align__(256) uint8_t interleaved_scan_kernel_smem[]; + constexpr bool kManageLocalTopK = Capacity > 0; + // Using shared memory for the (part of the) query; + // This allows to save on global memory bandwidth when reading index and query + // data at the same time. + // Its size is `query_smem_elems`. + T* query_shared = reinterpret_cast(interleaved_scan_kernel_smem); + // Make the query input and output point to this block's shared query + { + const int query_id = blockIdx.y; + query += query_id * dim; + if constexpr (kManageLocalTopK) { + neighbors += query_id * k * gridDim.x + blockIdx.x * k; + distances += query_id * k * gridDim.x + blockIdx.x * k; + } else { + distances += query_id * uint64_t(max_samples); + } + chunk_indices += (n_probes * query_id); + coarse_index += query_id * n_probes; + } + + // Copy a part of the query into shared memory for faster processing + copy_vectorized(query_shared, query, std::min(dim, query_smem_elems)); + __syncthreads(); + + using local_topk_t = block_sort_t; + local_topk_t queue(k); + { + using align_warp = raft::Pow2; + const int lane_id = align_warp::mod(threadIdx.x); + + // How many full warps needed to compute the distance (without remainder) + const uint32_t full_warps_along_dim = align_warp::roundDown(dim); + + const uint32_t shm_assisted_dim = + (dim > query_smem_elems) ? query_smem_elems : full_warps_along_dim; + + // Every CUDA block scans one cluster at a time. + for (int probe_id = blockIdx.x; probe_id < n_probes; probe_id += gridDim.x) { + const uint32_t list_id = coarse_index[probe_id]; // The id of cluster(list) + + // The number of vectors in each cluster(list); [nlist] + const uint32_t list_length = list_sizes[list_id]; + + // The number of interleaved groups to be processed + const uint32_t num_groups = + align_warp::div(list_length + align_warp::Mask); // ceildiv by power of 2 + + uint32_t sample_offset = 0; + if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; } + assert(list_length == chunk_indices[probe_id] - sample_offset); + assert(sample_offset + list_length <= max_samples); + + constexpr int kUnroll = raft::WarpSize / Veclen; + constexpr uint32_t kNumWarps = kThreadsPerBlock / raft::WarpSize; + // Every warp reads WarpSize vectors and computes the distances to them. + // Then, the distances and corresponding ids are distributed among the threads, + // and each thread adds one (id, dist) pair to the filtering queue. + for (uint32_t group_id = align_warp::div(threadIdx.x); group_id < num_groups; + group_id += kNumWarps) { + AccT dist = 0; + AccT norm_query = 0; + AccT norm_dataset = 0; + // This is where this warp begins reading data (start position of an interleaved group) + const T* data = list_data_ptrs[list_id] + (group_id * kIndexGroupSize) * dim; + + // This is the vector a given lane/thread handles + const uint32_t vec_id = group_id * raft::WarpSize + lane_id; + const bool valid = + vec_id < list_length && sample_filter(queries_offset + blockIdx.y, list_id, vec_id); + + if (valid) { + // Process first shm_assisted_dim dimensions (always using shared memory) + loadAndComputeDist lc( + dist, compute_dist, norm_query, norm_dataset); + for (int pos = 0; pos < shm_assisted_dim; + pos += raft::WarpSize, data += kIndexGroupSize * raft::WarpSize) { + lc.runLoadShmemCompute(data, query_shared, lane_id, pos); + } + + if (dim > query_smem_elems) { + // The default path - using shfl ops - for dimensions beyond query_smem_elems + loadAndComputeDist lc( + dist, compute_dist, norm_query, norm_dataset); + for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += raft::WarpSize) { + lc.runLoadShflAndCompute(data, query, pos, lane_id); + } + lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim); + } else { + // when shm_assisted_dim == full_warps_along_dim < dim + loadAndComputeDist<1, decltype(compute_dist), Veclen, T, AccT, ComputeNorm> lc( + dist, compute_dist, norm_query, norm_dataset); + for (int pos = full_warps_along_dim; pos < dim; + pos += Veclen, data += kIndexGroupSize * Veclen) { + lc.runLoadShmemCompute(data, query_shared, lane_id, pos); + } + } + } + + // Enqueue one element per thread + float val = valid ? static_cast(dist) : local_topk_t::queue_t::kDummy; + + if constexpr (ComputeNorm) { + if (valid) + val = val / (raft::sqrt(static_cast(norm_query)) * + raft::sqrt(static_cast(norm_dataset))); + } + if constexpr (kManageLocalTopK) { + queue.add(val, sample_offset + vec_id); + } else { + if (vec_id < list_length) distances[sample_offset + vec_id] = val; + } + } + + // fill up unused slots for current query + if constexpr (!kManageLocalTopK) { + if (probe_id + 1 == n_probes) { + for (uint32_t i = threadIdx.x + sample_offset + list_length; i < max_samples; + i += blockDim.x) { + distances[i] = local_topk_t::queue_t::kDummy; + } + } + } + } + } + + // finalize and store selected neighbours + if constexpr (kManageLocalTopK) { + __syncthreads(); + queue.done(interleaved_scan_kernel_smem); + queue.store(distances, neighbors, post_process); + } } /** * Configure the gridDim.x to maximize GPU occupancy, but reduce the output size */ -// template -inline uint32_t configure_launch_x(uint32_t numQueries, - uint32_t n_probes, - int32_t sMemSize, - CUkernel func) +template +uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMemSize, T func) { int dev_id; RAFT_CUDA_TRY(cudaGetDevice(&dev_id)); @@ -153,10 +995,12 @@ template -void launch_kernel(const index& index, + typename IvfSampleFilterT, + typename Lambda, + typename PostLambda> +void launch_kernel(Lambda lambda, + PostLambda post_process, + const index& index, const T* queries, const uint32_t* coarse_index, const uint32_t num_queries, @@ -165,10 +1009,7 @@ void launch_kernel(const index& index, const uint32_t k, const uint32_t max_samples, const uint32_t* chunk_indices, - IdxT* const* const inds_ptrs, - cuda::std::optional bitset_ptr, - cuda::std::optional bitset_len, - cuda::std::optional original_nbits, + IvfSampleFilterT sample_filter, uint32_t* neighbors, float* distances, uint32_t& grid_dim_x, @@ -176,19 +1017,16 @@ void launch_kernel(const index& index, { RAFT_EXPECTS(Veclen == index.veclen(), "Configured Veclen does not match the index interleaving pattern."); - - // Use tag types for the planner to avoid template bloat - auto kernel_planner = InterleavedScanPlanner()), - decltype(get_acc_type_tag()), - decltype(get_idx_type_tag())>( - Capacity, Veclen, Ascending, ComputeNorm); - kernel_planner.template add_metric_device_function()), - decltype(get_acc_type_tag())>( - get_metric_name(), Veclen); - kernel_planner.add_filter_device_function(get_filter_name()); - kernel_planner.add_post_lambda_device_function(get_post_lambda_name()); - auto kernel_launcher = kernel_planner.get_launcher(); - + constexpr auto kKernel = interleaved_scan_kernel; const int max_query_smem = 16384; int query_smem_elems = std::min(max_query_smem / sizeof(T), raft::Pow2::roundUp(index.dim())); @@ -206,8 +1044,7 @@ void launch_kernel(const index& index, constexpr uint32_t kMaxGridY = 32768; if (grid_dim_x == 0) { - grid_dim_x = configure_launch_x( - std::min(kMaxGridY, num_queries), n_probes, smem_size, kernel_launcher.get_kernel()); + grid_dim_x = configure_launch_x(std::min(kMaxGridY, num_queries), n_probes, smem_size, kKernel); return; } @@ -223,44 +1060,22 @@ void launch_kernel(const index& index, block_dim.x, n_probes, smem_size); - // kKernel<<>>(lambda, - // post_process, - // query_smem_elems, - // queries, - // coarse_index, - // index.data_ptrs().data_handle(), - // index.list_sizes().data_handle(), - // queries_offset + query_offset, - // n_probes, - // k, - // max_samples, - // chunk_indices, - // index.dim(), - // sample_filter, - // neighbors, - // distances); - kernel_launcher(stream, - grid_dim, - block_dim, - smem_size, - query_smem_elems, - queries, - coarse_index, - index.data_ptrs().data_handle(), - index.list_sizes().data_handle(), - queries_offset + query_offset, - n_probes, - k, - max_samples, - chunk_indices, - index.dim(), - // sample_filter, - inds_ptrs, - bitset_ptr.value_or(nullptr), - bitset_len.value_or(0), - original_nbits.value_or(0), - neighbors, - distances); + kKernel<<>>(lambda, + post_process, + query_smem_elems, + queries, + coarse_index, + index.data_ptrs().data_handle(), + index.list_sizes().data_handle(), + queries_offset + query_offset, + n_probes, + k, + max_samples, + chunk_indices, + index.dim(), + sample_filter, + neighbors, + distances); queries += grid_dim_y * index.dim(); if constexpr (Capacity > 0) { neighbors += grid_dim_y * grid_dim_x * k; @@ -273,6 +1088,59 @@ void launch_kernel(const index& index, } } +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) + { + const auto diff = x - y; + acc += diff * diff; + } +}; + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(uint32_t& acc, uint32_t x, uint32_t y) + { + if constexpr (Veclen > 1) { + const auto diff = __vabsdiffu4(x, y); + acc = raft::dp4a(diff, diff, acc); + } else { + const auto diff = __usad(x, y, 0u); + acc += diff * diff; + } + } +}; + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(int32_t& acc, int32_t x, int32_t y) + { + if constexpr (Veclen > 1) { + // Note that we enforce here that the unsigned version of dp4a is used, because the difference + // between two int8 numbers can be greater than 127 and therefore represented as a negative + // number in int8. Casting from int8 to int32 would yield incorrect results, while casting + // from uint8 to uint32 is correct. + const auto diff = __vabsdiffs4(x, y); + acc = raft::dp4a(diff, diff, static_cast(acc)); + } else { + const auto diff = x - y; + acc += diff * diff; + } + } +}; + +template +struct inner_prod_dist { + __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) + { + if constexpr (Veclen > 1 && (std::is_same_v || std::is_same_v)) { + acc = raft::dp4a(x, y, acc); + } else { + acc += x * y; + } + } +}; + /** Select the distance computation function and forward the rest of the arguments. */ template void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... args) { @@ -294,9 +1162,9 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg T, AccT, IdxT, - IvfSampleFilterTag, - tag_metric_euclidean, - tag_post_identity>(std::forward(args)...); + IvfSampleFilterT, + euclidean_dist, + raft::identity_op>({}, {}, std::forward(args)...); case cuvs::distance::DistanceType::L2SqrtExpanded: case cuvs::distance::DistanceType::L2SqrtUnexpanded: return launch_kernel, - tag_post_sqrt>(std::forward(args)...); + IvfSampleFilterT, + euclidean_dist, + raft::sqrt_op>({}, {}, std::forward(args)...); case cuvs::distance::DistanceType::InnerProduct: return launch_kernel, - tag_post_identity>(std::forward(args)...); + IvfSampleFilterT, + inner_prod_dist, + raft::identity_op>({}, {}, std::forward(args)...); case cuvs::distance::DistanceType::CosineExpanded: // NB: "Ascending" is reversed because the post-processing step is done after that sort return launch_kernel, - tag_post_compose>( + IvfSampleFilterT, + inner_prod_dist>( + {}, + raft::compose_op(raft::add_const_op{1.0f}, raft::mul_const_op{-1.0f}), std::forward(args)...); // NB: update the description of `knn::ivf_flat::build` when // adding here a new metric. default: RAFT_FAIL("The chosen distance metric is not supported (%d)", int(metric)); @@ -345,7 +1214,7 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg template (1, 16 / sizeof(T))> struct select_interleaved_scan_kernel { @@ -360,7 +1229,7 @@ struct select_interleaved_scan_kernel { { if constexpr (Capacity > 0) { if (k_max == 0 || k_max > Capacity) { - return select_interleaved_scan_kernel::run( + return select_interleaved_scan_kernel::run( k_max, veclen, select_min, std::forward(args)...); } } @@ -369,7 +1238,7 @@ struct select_interleaved_scan_kernel { return select_interleaved_scan_kernel::run(k_max, veclen, @@ -379,7 +1248,7 @@ struct select_interleaved_scan_kernel { } if constexpr (Veclen > 1) { if (veclen % Veclen != 0) { - return select_interleaved_scan_kernel::run( + return select_interleaved_scan_kernel::run( k_max, 1, select_min, std::forward(args)...); } } @@ -393,10 +1262,10 @@ struct select_interleaved_scan_kernel { veclen == Veclen, "Veclen must be power-of-two not bigger than the maximum allowed size for this data type."); if (select_min) { - launch_with_fixed_consts( + launch_with_fixed_consts( std::forward(args)...); } else { - launch_with_fixed_consts( + launch_with_fixed_consts( std::forward(args)...); } } @@ -453,38 +1322,26 @@ void ivfflat_interleaved_scan(const index& index, { const int capacity = raft::bound_by_power_of_two(k); - cuda::std::optional bitset_ptr; - cuda::std::optional bitset_len; - cuda::std::optional original_nbits; - - if constexpr (std::is_same_v>) { - bitset_ptr = sample_filter.view().data(); - bitset_len = sample_filter.view().size(); - original_nbits = sample_filter.view().get_original_nbits(); - } - select_interleaved_scan_kernel())>:: - run(capacity, - index.veclen(), - select_min, - metric, - index, - queries, - coarse_query_results, - n_queries, - queries_offset, - n_probes, - k, - max_samples, - chunk_indices, - index.inds_ptrs().data_handle(), - bitset_ptr, - bitset_len, - original_nbits, - neighbors, - distances, - grid_dim_x, - stream); + auto filter_adapter = cuvs::neighbors::filtering::ivf_to_sample_filter( + index.inds_ptrs().data_handle(), sample_filter); + select_interleaved_scan_kernel::run(capacity, + index.veclen(), + select_min, + metric, + index, + queries, + coarse_query_results, + n_queries, + queries_offset, + n_probes, + k, + max_samples, + chunk_indices, + filter_adapter, + neighbors, + distances, + grid_dim_x, + stream); } } // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh index 3265a622b9..b1116c7ced 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh @@ -17,7 +17,11 @@ #pragma once #include "../detail/ann_utils.cuh" +#if CUDART_VERSION >= 13000 +#include "ivf_flat_interleaved_scan_jit.cuh" +#else #include "ivf_flat_interleaved_scan.cuh" +#endif #include #include #include diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh new file mode 100644 index 0000000000..3311fe43b4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2022-2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../ivf_common.cuh" +#include "jit_lto_kernels/interleaved_scan_planner.hpp" +#include "jit_lto_kernels/interleaved_scan_tags.hpp" +#include +#include +#include + +#include "../detail/ann_utils.cuh" +#include +#include +#include +#include // RAFT_CUDA_TRY +#include + +#include + +namespace cuvs::neighbors::ivf_flat::detail { + +static constexpr int kThreadsPerBlock = 128; + +using namespace cuvs::spatial::knn::detail; // NOLINT + +// Constexpr mapping functions from actual types to tags +template +constexpr auto get_data_type_tag() +{ + if constexpr (std::is_same_v) { return tag_f{}; } + if constexpr (std::is_same_v) { return tag_h{}; } + if constexpr (std::is_same_v) { return tag_sc{}; } + if constexpr (std::is_same_v) { return tag_uc{}; } +} + +template +constexpr auto get_acc_type_tag() +{ + if constexpr (std::is_same_v) { return tag_acc_f{}; } + if constexpr (std::is_same_v) { return tag_acc_h{}; } + if constexpr (std::is_same_v) { return tag_acc_i{}; } + if constexpr (std::is_same_v) { return tag_acc_ui{}; } +} + +template +constexpr auto get_idx_type_tag() +{ + if constexpr (std::is_same_v) { return tag_idx_l{}; } +} + +template +constexpr auto get_filter_type_tag() +{ + using namespace cuvs::neighbors::filtering; + + // Determine the filter implementation tag + if constexpr (std::is_same_v) { + return tag_filter{}; + } + if constexpr (std::is_same_v>) { + return tag_filter{}; + } +} + +// template +// constexpr auto get_metric_tag() +// { +// // Get tags for T and AccT +// auto t_tag = get_data_type_tag(); +// auto acc_tag = get_acc_type_tag(); + +// // Check for euclidean_dist and return templated tag with tag types +// if constexpr (std::is_same_v>) { +// return tag_metric_euclidean{}; +// } +// // Check for inner_prod_dist and return templated tag with tag types +// if constexpr (std::is_same_v>) { +// return tag_metric_inner_product{}; +// } +// } + +template +constexpr auto get_metric_name() +{ + if constexpr (std::is_same_v>) { + return "euclidean"; + } + if constexpr (std::is_same_v>) { + return "inner_prod"; + } +} + +template +constexpr auto get_filter_name() +{ + if constexpr (std::is_same_v>) { + return "filter_none"; + } + if constexpr (std::is_same_v>) { + return "filter_bitset"; + } +} + +template +constexpr auto get_post_lambda_name() +{ + if constexpr (std::is_same_v) { return "post_identity"; } + if constexpr (std::is_same_v) { return "post_sqrt"; } + if constexpr (std::is_same_v) { return "post_compose"; } +} + +/** + * Configure the gridDim.x to maximize GPU occupancy, but reduce the output size + */ +// template +inline uint32_t configure_launch_x(uint32_t numQueries, + uint32_t n_probes, + int32_t sMemSize, + CUkernel func) +{ + int dev_id; + RAFT_CUDA_TRY(cudaGetDevice(&dev_id)); + int num_sms; + RAFT_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id)); + int num_blocks_per_sm = 0; + RAFT_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks_per_sm, func, kThreadsPerBlock, sMemSize)); + + size_t min_grid_size = num_sms * num_blocks_per_sm; + size_t min_grid_x = raft::ceildiv(min_grid_size, numQueries); + return min_grid_x > n_probes ? n_probes : static_cast(min_grid_x); +} + +template +void launch_kernel(const index& index, + const T* queries, + const uint32_t* coarse_index, + const uint32_t num_queries, + const uint32_t queries_offset, + const uint32_t n_probes, + const uint32_t k, + const uint32_t max_samples, + const uint32_t* chunk_indices, + IdxT* const* const inds_ptrs, + cuda::std::optional bitset_ptr, + cuda::std::optional bitset_len, + cuda::std::optional original_nbits, + uint32_t* neighbors, + float* distances, + uint32_t& grid_dim_x, + rmm::cuda_stream_view stream) +{ + RAFT_EXPECTS(Veclen == index.veclen(), + "Configured Veclen does not match the index interleaving pattern."); + + // Use tag types for the planner to avoid template bloat + auto kernel_planner = InterleavedScanPlanner()), + decltype(get_acc_type_tag()), + decltype(get_idx_type_tag())>( + Capacity, Veclen, Ascending, ComputeNorm); + kernel_planner.template add_metric_device_function()), + decltype(get_acc_type_tag())>( + get_metric_name(), Veclen); + kernel_planner.add_filter_device_function(get_filter_name()); + kernel_planner.add_post_lambda_device_function(get_post_lambda_name()); + auto kernel_launcher = kernel_planner.get_launcher(); + + const int max_query_smem = 16384; + int query_smem_elems = std::min(max_query_smem / sizeof(T), + raft::Pow2::roundUp(index.dim())); + int smem_size = query_smem_elems * sizeof(T); + + if constexpr (Capacity > 0) { + constexpr int kSubwarpSize = std::min(Capacity, raft::WarpSize); + auto block_merge_mem = + raft::matrix::detail::select::warpsort::calc_smem_size_for_block_wide( + kThreadsPerBlock / kSubwarpSize, k); + smem_size += std::max(smem_size, block_merge_mem); + } + + // power-of-two less than cuda limit (for better addr alignment) + constexpr uint32_t kMaxGridY = 32768; + + if (grid_dim_x == 0) { + grid_dim_x = configure_launch_x( + std::min(kMaxGridY, num_queries), n_probes, smem_size, kernel_launcher.get_kernel()); + return; + } + + for (uint32_t query_offset = 0; query_offset < num_queries; query_offset += kMaxGridY) { + uint32_t grid_dim_y = std::min(kMaxGridY, num_queries - query_offset); + dim3 grid_dim(grid_dim_x, grid_dim_y, 1); + dim3 block_dim(kThreadsPerBlock); + RAFT_LOG_TRACE( + "Launching the ivf-flat interleaved_scan_kernel (%d, %d, 1) x (%d, 1, 1), n_probes = %d, " + "smem_size = %d", + grid_dim.x, + grid_dim.y, + block_dim.x, + n_probes, + smem_size); + kernel_launcher(stream, + grid_dim, + block_dim, + smem_size, + query_smem_elems, + queries, + coarse_index, + index.data_ptrs().data_handle(), + index.list_sizes().data_handle(), + queries_offset + query_offset, + n_probes, + k, + max_samples, + chunk_indices, + index.dim(), + // sample_filter, + inds_ptrs, + bitset_ptr.value_or(nullptr), + bitset_len.value_or(0), + original_nbits.value_or(0), + neighbors, + distances); + queries += grid_dim_y * index.dim(); + if constexpr (Capacity > 0) { + neighbors += grid_dim_y * grid_dim_x * k; + distances += grid_dim_y * grid_dim_x * k; + } else { + distances += grid_dim_y * max_samples; + } + chunk_indices += grid_dim_y * n_probes; + coarse_index += grid_dim_y * n_probes; + } +} + +/** Select the distance computation function and forward the rest of the arguments. */ +template +void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... args) +{ + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2Unexpanded: + return launch_kernel, + tag_post_identity>(std::forward(args)...); + case cuvs::distance::DistanceType::L2SqrtExpanded: + case cuvs::distance::DistanceType::L2SqrtUnexpanded: + return launch_kernel, + tag_post_sqrt>(std::forward(args)...); + case cuvs::distance::DistanceType::InnerProduct: + return launch_kernel, + tag_post_identity>(std::forward(args)...); + case cuvs::distance::DistanceType::CosineExpanded: + // NB: "Ascending" is reversed because the post-processing step is done after that sort + return launch_kernel, + tag_post_compose>( + std::forward(args)...); // NB: update the description of `knn::ivf_flat::build` when + // adding here a new metric. + default: RAFT_FAIL("The chosen distance metric is not supported (%d)", int(metric)); + } +} + +/** + * Lift the `capacity` and `veclen` parameters to the template level, + * forward the rest of the arguments unmodified to `launch_interleaved_scan_kernel`. + */ +template (1, 16 / sizeof(T))> +struct select_interleaved_scan_kernel { + /** + * Recursively reduce the `Capacity` and `Veclen` parameters until they match the + * corresponding runtime arguments. + * By default, this recursive process starts with maximum possible values of the + * two parameters and ends with both values equal to 1. + */ + template + static inline void run(int k_max, int veclen, bool select_min, Args&&... args) + { + if constexpr (Capacity > 0) { + if (k_max == 0 || k_max > Capacity) { + return select_interleaved_scan_kernel::run( + k_max, veclen, select_min, std::forward(args)...); + } + } + if constexpr (Capacity > 1) { + if (k_max * 2 <= Capacity) { + return select_interleaved_scan_kernel::run(k_max, + veclen, + select_min, + std::forward(args)...); + } + } + if constexpr (Veclen > 1) { + if (veclen % Veclen != 0) { + return select_interleaved_scan_kernel::run( + k_max, 1, select_min, std::forward(args)...); + } + } + // NB: this is the limitation of the warpsort structures that use a huge number of + // registers (used in the main kernel here). + RAFT_EXPECTS(Capacity == 0 || k_max == Capacity, + "Capacity must be either 0 or a power-of-two not bigger than the maximum " + "allowed size matrix::detail::select::warpsort::kMaxCapacity (%d).", + raft::matrix::detail::select::warpsort::kMaxCapacity); + RAFT_EXPECTS( + veclen == Veclen, + "Veclen must be power-of-two not bigger than the maximum allowed size for this data type."); + if (select_min) { + launch_with_fixed_consts( + std::forward(args)...); + } else { + launch_with_fixed_consts( + std::forward(args)...); + } + } +}; + +/** + * @brief Configure and launch an appropriate template instance of the interleaved scan kernel. + * + * @tparam T value type + * @tparam AccT accumulated type + * @tparam IdxT type of the indices + * + * @param index previously built ivf-flat index + * @param[in] queries device pointer to the query vectors [batch_size, dim] + * @param[in] coarse_query_results device pointer to the cluster (list) ids [batch_size, n_probes] + * @param n_queries batch size + * @param[in] queries_offset + * An offset of the current query batch. It is used for feeding sample_filter with the + * correct query index. + * @param metric type of the measured distance + * @param n_probes number of nearest clusters to query + * @param k number of nearest neighbors. + * NB: the maximum value of `k` is limited statically by `kMaxCapacity`. + * @param select_min whether to select nearest (true) or furthest (false) points w.r.t. the given + * metric. + * @param[out] neighbors device pointer to the result indices for each query and cluster + * [batch_size, grid_dim_x, k] + * @param[out] distances device pointer to the result distances for each query and cluster + * [batch_size, grid_dim_x, k] + * @param[inout] grid_dim_x number of blocks launched across all n_probes clusters; + * (one block processes one or more probes, hence: 1 <= grid_dim_x <= n_probes) + * @param stream + * @param sample_filter + * A filter that selects samples for a given query. Use an instance of none_sample_filter to + * provide a green light for every sample. + */ +template +void ivfflat_interleaved_scan(const index& index, + const T* queries, + const uint32_t* coarse_query_results, + const uint32_t n_queries, + const uint32_t queries_offset, + const cuvs::distance::DistanceType metric, + const uint32_t n_probes, + const uint32_t k, + const uint32_t max_samples, + const uint32_t* chunk_indices, + const bool select_min, + IvfSampleFilterT sample_filter, + uint32_t* neighbors, + float* distances, + uint32_t& grid_dim_x, + rmm::cuda_stream_view stream) +{ + const int capacity = raft::bound_by_power_of_two(k); + + cuda::std::optional bitset_ptr; + cuda::std::optional bitset_len; + cuda::std::optional original_nbits; + + if constexpr (std::is_same_v>) { + bitset_ptr = sample_filter.view().data(); + bitset_len = sample_filter.view().size(); + original_nbits = sample_filter.view().get_original_nbits(); + } + select_interleaved_scan_kernel())>:: + run(capacity, + index.veclen(), + select_min, + metric, + index, + queries, + coarse_query_results, + n_queries, + queries_offset, + n_probes, + k, + max_samples, + chunk_indices, + index.inds_ptrs().data_handle(), + bitset_ptr, + bitset_len, + original_nbits, + neighbors, + distances, + grid_dim_x, + stream); +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md index 92a34c9512..29434c49be 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md @@ -67,14 +67,7 @@ Filename: interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu Each generated `.cu` file contains: 1. **Apache 2.0 License Header** -2. **Include**: `#include "../ivf_flat_interleaved_scan.cuh"` +2. **Include**: `#include "../ivf_flat_interleaved_scan_jit.cuh"` 3. **Conditional compilation**: - `#ifdef BUILD_KERNEL`: Template instantiation - `#else`: Registration function for JIT/LTO system - -## Notes - -- All files are generated in the same directory as the script -- The script automatically creates CMake files with all generated filenames -- Progress is printed every 100 files during generation -- Files are sorted alphabetically in the CMake lists From db9a4872575f030ebade7babf2127bd35ff77d57 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 22 Oct 2025 21:33:30 +0000 Subject: [PATCH 022/158] remove bench --- .../neighbors/ivf_flat_baseline_benchmark.cu | 251 ------------------ cpp/bench/neighbors/ivf_flat_jit_benchmark.cu | 249 ----------------- 2 files changed, 500 deletions(-) delete mode 100644 cpp/bench/neighbors/ivf_flat_baseline_benchmark.cu delete mode 100644 cpp/bench/neighbors/ivf_flat_jit_benchmark.cu diff --git a/cpp/bench/neighbors/ivf_flat_baseline_benchmark.cu b/cpp/bench/neighbors/ivf_flat_baseline_benchmark.cu deleted file mode 100644 index 4ff831a737..0000000000 --- a/cpp/bench/neighbors/ivf_flat_baseline_benchmark.cu +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Baseline benchmark for non-JIT branch (production) -// Runs 3 searches to measure performance without JIT-LTO overhead - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -void print_usage(const char* program_name) -{ - std::cout << "Usage: " << program_name - << " --n_rows --n_dims --n_queries --k [options]\n" - << "\nRequired arguments:\n" - << " --n_rows Number of vectors in the dataset\n" - << " --n_dims Dimensionality of vectors\n" - << " --n_queries Number of query vectors\n" - << " --k Number of neighbors to find\n" - << "\nOptional arguments:\n" - << " --n_lists Number of IVF lists (default: sqrt(n_rows))\n" - << " --n_probes Number of probes during search (default: min(n_lists, 50))\n" - << " --metric Distance metric: l2, inner_product, cosine (default: l2)\n" - << " --help Display this help message\n"; -} - -struct BenchmarkParams { - int64_t n_rows; - int64_t n_dims; - int64_t n_queries; - uint32_t k; - uint32_t n_lists = 0; // 0 means auto-compute - uint32_t n_probes = 0; // 0 means auto-compute - std::string metric = "l2"; - - bool validate() const - { - if (n_rows <= 0 || n_dims <= 0 || n_queries <= 0 || k <= 0) { - std::cerr << "Error: All dimension parameters must be positive\n"; - return false; - } - if (metric != "l2" && metric != "inner_product" && metric != "cosine") { - std::cerr << "Error: Invalid metric. Must be l2, inner_product, or cosine\n"; - return false; - } - return true; - } -}; - -bool parse_args(int argc, char** argv, BenchmarkParams& params) -{ - if (argc < 2) { - print_usage(argv[0]); - return false; - } - - for (int i = 1; i < argc; i++) { - std::string arg = argv[i]; - - if (arg == "--help" || arg == "-h") { - print_usage(argv[0]); - return false; - } else if (arg == "--n_rows" && i + 1 < argc) { - params.n_rows = std::stoll(argv[++i]); - } else if (arg == "--n_dims" && i + 1 < argc) { - params.n_dims = std::stoll(argv[++i]); - } else if (arg == "--n_queries" && i + 1 < argc) { - params.n_queries = std::stoll(argv[++i]); - } else if (arg == "--k" && i + 1 < argc) { - params.k = std::stoul(argv[++i]); - } else if (arg == "--n_lists" && i + 1 < argc) { - params.n_lists = std::stoul(argv[++i]); - } else if (arg == "--n_probes" && i + 1 < argc) { - params.n_probes = std::stoul(argv[++i]); - } else if (arg == "--metric" && i + 1 < argc) { - params.metric = argv[++i]; - } else { - std::cerr << "Error: Unknown argument '" << arg << "'\n"; - print_usage(argv[0]); - return false; - } - } - - return params.validate(); -} - -cuvs::distance::DistanceType get_metric_type(const std::string& metric) -{ - if (metric == "l2") { - return cuvs::distance::DistanceType::L2Expanded; - } else if (metric == "inner_product") { - return cuvs::distance::DistanceType::InnerProduct; - } else if (metric == "cosine") { - return cuvs::distance::DistanceType::CosineExpanded; - } - return cuvs::distance::DistanceType::L2Expanded; -} - -int main(int argc, char** argv) -{ - BenchmarkParams params; - - if (!parse_args(argc, argv, params)) { return 1; } - - // Auto-compute n_lists and n_probes if not specified - if (params.n_lists == 0) { - params.n_lists = std::max(1u, static_cast(std::sqrt(params.n_rows))); - } - if (params.n_probes == 0) { params.n_probes = std::min(params.n_lists, 50u); } - - std::cout << "\n=== IVF Flat Baseline Benchmark (No JIT) ===\n"; - std::cout << "Dataset size: " << params.n_rows << " x " << params.n_dims << "\n"; - std::cout << "Query size: " << params.n_queries << "\n"; - std::cout << "k: " << params.k << "\n"; - std::cout << "n_lists: " << params.n_lists << "\n"; - std::cout << "n_probes: " << params.n_probes << "\n"; - std::cout << "metric: " << params.metric << "\n"; - std::cout << "============================================\n\n"; - - try { - // Initialize RAFT resources - raft::device_resources handle; - auto stream = raft::resource::get_cuda_stream(handle); - - // Generate random dataset - std::cout << "Generating random dataset...\n"; - auto dataset = raft::make_device_matrix(handle, params.n_rows, params.n_dims); - auto queries = - raft::make_device_matrix(handle, params.n_queries, params.n_dims); - - raft::random::RngState rng(42ULL); - raft::random::uniform( - handle, rng, dataset.data_handle(), params.n_rows * params.n_dims, 0.0f, 1.0f); - raft::random::uniform( - handle, rng, queries.data_handle(), params.n_queries * params.n_dims, 0.0f, 1.0f); - raft::resource::sync_stream(handle); - - // Build index - std::cout << "Building IVF Flat index...\n"; - auto build_start = std::chrono::high_resolution_clock::now(); - - cuvs::neighbors::ivf_flat::index_params index_params; - index_params.n_lists = params.n_lists; - index_params.metric = get_metric_type(params.metric); - index_params.adaptive_centers = false; - index_params.add_data_on_build = true; - index_params.kmeans_trainset_fraction = 1.0; - - auto index = cuvs::neighbors::ivf_flat::build( - handle, index_params, raft::make_const_mdspan(dataset.view())); - raft::resource::sync_stream(handle); - - auto build_end = std::chrono::high_resolution_clock::now(); - auto build_time = - std::chrono::duration_cast(build_end - build_start).count(); - std::cout << "Build time: " << build_time << " ms\n\n"; - - // Prepare output buffers - auto neighbors = raft::make_device_matrix(handle, params.n_queries, params.k); - auto distances = raft::make_device_matrix(handle, params.n_queries, params.k); - - // Search parameters - cuvs::neighbors::ivf_flat::search_params search_params; - search_params.n_probes = params.n_probes; - - // Run search 21 times (1 cold + 20 warm, like JIT benchmark) - constexpr int num_runs = 21; - std::vector search_times; - search_times.reserve(num_runs); - - std::cout << "\nRunning " << num_runs << " searches (1 cold + 20 warm)...\n"; - - for (int run = 0; run < num_runs; run++) { - // Synchronize before timing - raft::resource::sync_stream(handle); - - auto search_start = std::chrono::high_resolution_clock::now(); - - cuvs::neighbors::ivf_flat::search( - handle, search_params, index, queries.view(), neighbors.view(), distances.view()); - - // Synchronize after search to ensure completion - raft::resource::sync_stream(handle); - - auto search_end = std::chrono::high_resolution_clock::now(); - auto search_time_us = - std::chrono::duration_cast(search_end - search_start).count(); - - search_times.push_back(search_time_us / 1000.0); // Convert to milliseconds - - if (run == 0) { - std::cout << "Run 1 (cold): " << search_times[run] << " ms\n"; - } else { - std::cout << "Run " << (run + 1) << " (warm): " << search_times[run] << " ms\n"; - } - } - - // Calculate statistics - double first_run = search_times[0]; - double warm_total = 0.0; - double min_warm = search_times[1]; - double max_warm = search_times[1]; - - // Average of runs 2-4 (warm runs) - for (int i = 1; i < num_runs; i++) { - warm_total += search_times[i]; - min_warm = std::min(min_warm, search_times[i]); - max_warm = std::max(max_warm, search_times[i]); - } - - double avg_warm_time = warm_total / (num_runs - 1); - double all_runs_avg = (first_run + warm_total) / num_runs; - - std::cout << "\n=== Results ===\n"; - std::cout << "First run (cold): " << first_run << " ms\n"; - std::cout << "Average time (runs 2-21): " << avg_warm_time << " ms\n"; - std::cout << "Min warm time: " << min_warm << " ms\n"; - std::cout << "Max warm time: " << max_warm << " ms\n"; - std::cout << "Overall average: " << all_runs_avg << " ms\n"; - std::cout << "Cold run overhead: " << (first_run - avg_warm_time) << " ms\n"; - std::cout << "Throughput (after warmup): " << (params.n_queries / (avg_warm_time / 1000.0)) - << " queries/sec\n"; - std::cout << "===============\n"; - - } catch (const std::exception& e) { - std::cerr << "Error: " << e.what() << "\n"; - return 1; - } - - return 0; -} diff --git a/cpp/bench/neighbors/ivf_flat_jit_benchmark.cu b/cpp/bench/neighbors/ivf_flat_jit_benchmark.cu deleted file mode 100644 index 99f9e6881e..0000000000 --- a/cpp/bench/neighbors/ivf_flat_jit_benchmark.cu +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -void print_usage(const char* program_name) -{ - std::cout << "Usage: " << program_name - << " --n_rows --n_dims --n_queries --k [options]\n" - << "\nRequired arguments:\n" - << " --n_rows Number of vectors in the dataset\n" - << " --n_dims Dimensionality of vectors\n" - << " --n_queries Number of query vectors\n" - << " --k Number of neighbors to find\n" - << "\nOptional arguments:\n" - << " --n_lists Number of IVF lists (default: sqrt(n_rows))\n" - << " --n_probes Number of probes during search (default: min(n_lists, 50))\n" - << " --metric Distance metric: l2, inner_product, cosine (default: l2)\n" - << " --help Display this help message\n"; -} - -struct BenchmarkParams { - int64_t n_rows; - int64_t n_dims; - int64_t n_queries; - uint32_t k; - uint32_t n_lists = 0; // 0 means auto-compute - uint32_t n_probes = 0; // 0 means auto-compute - std::string metric = "l2"; - - bool validate() const - { - if (n_rows <= 0 || n_dims <= 0 || n_queries <= 0 || k <= 0) { - std::cerr << "Error: All dimension parameters must be positive\n"; - return false; - } - if (metric != "l2" && metric != "inner_product" && metric != "cosine") { - std::cerr << "Error: Invalid metric. Must be l2, inner_product, or cosine\n"; - return false; - } - return true; - } -}; - -bool parse_args(int argc, char** argv, BenchmarkParams& params) -{ - if (argc < 2) { - print_usage(argv[0]); - return false; - } - - for (int i = 1; i < argc; i++) { - std::string arg = argv[i]; - - if (arg == "--help" || arg == "-h") { - print_usage(argv[0]); - return false; - } else if (arg == "--n_rows" && i + 1 < argc) { - params.n_rows = std::stoll(argv[++i]); - } else if (arg == "--n_dims" && i + 1 < argc) { - params.n_dims = std::stoll(argv[++i]); - } else if (arg == "--n_queries" && i + 1 < argc) { - params.n_queries = std::stoll(argv[++i]); - } else if (arg == "--k" && i + 1 < argc) { - params.k = std::stoul(argv[++i]); - } else if (arg == "--n_lists" && i + 1 < argc) { - params.n_lists = std::stoul(argv[++i]); - } else if (arg == "--n_probes" && i + 1 < argc) { - params.n_probes = std::stoul(argv[++i]); - } else if (arg == "--metric" && i + 1 < argc) { - params.metric = argv[++i]; - } else { - std::cerr << "Error: Unknown argument '" << arg << "'\n"; - print_usage(argv[0]); - return false; - } - } - - return params.validate(); -} - -cuvs::distance::DistanceType get_metric_type(const std::string& metric) -{ - if (metric == "l2") { - return cuvs::distance::DistanceType::L2Expanded; - } else if (metric == "inner_product") { - return cuvs::distance::DistanceType::InnerProduct; - } else if (metric == "cosine") { - return cuvs::distance::DistanceType::CosineExpanded; - } - return cuvs::distance::DistanceType::L2Expanded; -} - -int main(int argc, char** argv) -{ - BenchmarkParams params; - - if (!parse_args(argc, argv, params)) { return 1; } - - // Auto-compute n_lists and n_probes if not specified - if (params.n_lists == 0) { - params.n_lists = std::max(1u, static_cast(std::sqrt(params.n_rows))); - } - if (params.n_probes == 0) { params.n_probes = std::min(params.n_lists, 50u); } - - std::cout << "\n=== IVF Flat JIT LTO Benchmark ===\n"; - std::cout << "Dataset size: " << params.n_rows << " x " << params.n_dims << "\n"; - std::cout << "Query size: " << params.n_queries << "\n"; - std::cout << "k: " << params.k << "\n"; - std::cout << "n_lists: " << params.n_lists << "\n"; - std::cout << "n_probes: " << params.n_probes << "\n"; - std::cout << "metric: " << params.metric << "\n"; - std::cout << "==================================\n\n"; - - try { - // Initialize RAFT resources - raft::device_resources handle; - auto stream = raft::resource::get_cuda_stream(handle); - - // Generate random dataset - std::cout << "Generating random dataset...\n"; - auto dataset = raft::make_device_matrix(handle, params.n_rows, params.n_dims); - auto queries = - raft::make_device_matrix(handle, params.n_queries, params.n_dims); - - raft::random::RngState rng(42ULL); - raft::random::uniform( - handle, rng, dataset.data_handle(), params.n_rows * params.n_dims, 0.0f, 1.0f); - raft::random::uniform( - handle, rng, queries.data_handle(), params.n_queries * params.n_dims, 0.0f, 1.0f); - raft::resource::sync_stream(handle); - - // Build index - std::cout << "Building IVF Flat index...\n"; - auto build_start = std::chrono::high_resolution_clock::now(); - - cuvs::neighbors::ivf_flat::index_params index_params; - index_params.n_lists = params.n_lists; - index_params.metric = get_metric_type(params.metric); - index_params.adaptive_centers = false; - index_params.add_data_on_build = true; - index_params.kmeans_trainset_fraction = 1.0; - - auto index = cuvs::neighbors::ivf_flat::build( - handle, index_params, raft::make_const_mdspan(dataset.view())); - raft::resource::sync_stream(handle); - - auto build_end = std::chrono::high_resolution_clock::now(); - auto build_time = - std::chrono::duration_cast(build_end - build_start).count(); - std::cout << "Build time: " << build_time << " ms\n\n"; - - // Prepare output buffers - auto neighbors = raft::make_device_matrix(handle, params.n_queries, params.k); - auto distances = raft::make_device_matrix(handle, params.n_queries, params.k); - - // Search parameters - cuvs::neighbors::ivf_flat::search_params search_params; - search_params.n_probes = params.n_probes; - - // Run search 21 times: 1 cold + 20 warm - constexpr int num_warmup_runs = 1; - constexpr int num_timed_runs = 20; - constexpr int total_runs = num_warmup_runs + num_timed_runs; - - std::vector search_times; - search_times.reserve(total_runs); - - for (int run = 0; run < total_runs; run++) { - // Synchronize before timing - raft::resource::sync_stream(handle); - - auto search_start = std::chrono::high_resolution_clock::now(); - - cuvs::neighbors::ivf_flat::search( - handle, search_params, index, queries.view(), neighbors.view(), distances.view()); - - // Synchronize after search to ensure completion - raft::resource::sync_stream(handle); - - auto search_end = std::chrono::high_resolution_clock::now(); - auto search_time_us = - std::chrono::duration_cast(search_end - search_start).count(); - - search_times.push_back(search_time_us / 1000.0); // Convert to milliseconds - - if (run == 0) { - std::cout << "Run " << (run + 1) - << " (First run - includes JIT LTO overhead): " << search_times[run] << " ms\n"; - } else { - std::cout << "Run " << (run + 1) << ": " << search_times[run] << " ms\n"; - } - } - - // Calculate statistics - double first_run_time = search_times[0]; - double avg_warmup_time = 0.0; - for (int i = 1; i < total_runs; i++) { - avg_warmup_time += search_times[i]; - } - avg_warmup_time /= num_timed_runs; - - double jit_overhead = first_run_time - avg_warmup_time; - - std::cout << "\n=== Results ===\n"; - std::cout << "First run time (with JIT LTO): " << first_run_time << " ms\n"; - std::cout << "Average time (runs 2-21): " << avg_warmup_time << " ms\n"; - std::cout << "Estimated JIT LTO overhead: " << jit_overhead << " ms\n"; - std::cout << "Throughput (after warmup): " - << (params.n_queries / (avg_warmup_time / 1000.0)) << " queries/sec\n"; - - // Calculate speedup after caching - if (jit_overhead > 0) { - double speedup = first_run_time / avg_warmup_time; - std::cout << "Speedup after warmup: " << speedup << "x\n"; - } - - std::cout << "===============\n"; - - } catch (const std::exception& e) { - std::cerr << "Error: " << e.what() << "\n"; - return 1; - } - - return 0; -} From aa9294f7296040d6828a9105ebff127c8bae4180 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 22 Oct 2025 21:40:30 +0000 Subject: [PATCH 023/158] c include directory --- cpp/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index abf50c5142..21a90d743a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -598,6 +598,7 @@ if(NOT BUILD_CPU_ONLY) target_include_directories( jit_lto_fatbins PRIVATE "$" "$" + "$" ) target_compile_options(jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size) target_compile_options( @@ -638,6 +639,7 @@ if(NOT BUILD_CPU_ONLY) jit_lto_fatbins_as_cpp_sources PRIVATE "$" "$" + "$" ) embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) From 2eb77fef3ce8d35994477009194841b8360391b5 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 22 Oct 2025 21:45:21 +0000 Subject: [PATCH 024/158] style check --- cpp/CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 21a90d743a..dbc1b9a5b7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -596,9 +596,10 @@ if(NOT BUILD_CPU_ONLY) target_compile_definitions(jit_lto_fatbins PRIVATE BUILD_KERNEL) target_include_directories( - jit_lto_fatbins PRIVATE "$" - "$" - "$" + jit_lto_fatbins + PRIVATE "$" + "$" + "$" ) target_compile_options(jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size) target_compile_options( From 3e35b991afd0ce8f32716d982eeae7d3d83baf49 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 23 Oct 2025 22:01:48 +0000 Subject: [PATCH 025/158] guard cuda calls and use shared_ptr --- .../cuvs/detail/jit_lto/AlgorithmLauncher.h | 10 ++-- .../cuvs/detail/jit_lto/AlgorithmPlanner.h | 4 +- .../cuvs/detail/jit_lto/FragmentDatabase.h | 8 ---- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 10 ++-- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 24 +++++----- .../ivf_flat_interleaved_scan_jit.cuh | 46 +++++++++---------- 6 files changed, 48 insertions(+), 54 deletions(-) diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h index ac4807009c..ad01e28510 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h @@ -23,16 +23,15 @@ #include #include -#include +#include struct AlgorithmLauncher { AlgorithmLauncher() = default; - AlgorithmLauncher(cudaLibrary_t l, cudaKernel_t k); + AlgorithmLauncher(cudaKernel_t k); template - void operator()( - cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, Args&&... args) + void dispatch(cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, Args&&... args) { void* kernel_args[] = {const_cast(static_cast(&args))...}; this->call(stream, grid, block, shared_mem, kernel_args); @@ -42,8 +41,7 @@ struct AlgorithmLauncher { private: void call(cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** args); - cudaLibrary_t library; cudaKernel_t kernel; }; -std::unordered_map& get_cached_launchers(); +std::unordered_map>& get_cached_launchers(); diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h index 00c5516b45..c8e23122e8 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h @@ -26,7 +26,7 @@ struct FragmentEntry; struct AlgorithmPlanner { AlgorithmPlanner(std::string const& n, std::string const& p) : entrypoint(n + "_" + p) {} - AlgorithmLauncher get_launcher(); + std::shared_ptr get_launcher(); std::string entrypoint; std::vector device_functions; @@ -36,5 +36,5 @@ struct AlgorithmPlanner { void add_entrypoint(); void add_device_functions(); std::string get_device_functions_key(); - AlgorithmLauncher build(); + std::shared_ptr build(); }; diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h index 5a46c92a4e..3e4b6703e2 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h @@ -23,14 +23,6 @@ #include "FragmentEntry.h" #include "MakeFragmentKey.h" -struct NRTCLTOFragmentCompiler; - -// struct PerEntryCachedInfo { -// std::unordered_set, FragmentEntryHash, -// FragmentEntryEqual> -// entries; -// }; - class FragmentDatabase { public: FragmentDatabase(FragmentDatabase const&) = delete; diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index cb623c6bf8..56b854263d 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -16,7 +16,9 @@ #include -AlgorithmLauncher::AlgorithmLauncher(cudaLibrary_t l, cudaKernel_t k) : library{l}, kernel{k} {} +#include + +AlgorithmLauncher::AlgorithmLauncher(cudaKernel_t k) : kernel{k} {} void AlgorithmLauncher::call( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) @@ -33,11 +35,11 @@ void AlgorithmLauncher::call( config.numAttrs = 1; config.dynamicSmemBytes = shared_mem; - cudaLaunchKernelExC(&config, kernel, kernel_args); + RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); } -std::unordered_map& get_cached_launchers() +std::unordered_map>& get_cached_launchers() { - static std::unordered_map launchers; + static std::unordered_map> launchers; return launchers; } diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 8787b52c08..1561af0e52 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -28,6 +28,8 @@ #include "cuda_runtime.h" #include "nvJitLink.h" +#include + namespace { // We can make a better RAII wrapper around nvjitlinkhandle void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) @@ -71,12 +73,11 @@ std::string AlgorithmPlanner::get_device_functions_key() return key; } -AlgorithmLauncher AlgorithmPlanner::get_launcher() +std::shared_ptr AlgorithmPlanner::get_launcher() { auto& launchers = get_cached_launchers(); auto launch_key = this->entrypoint + this->get_device_functions_key(); if (launchers.count(launch_key) == 0) { - auto start = std::chrono::high_resolution_clock::now(); add_entrypoint(); add_device_functions(); launchers[launch_key] = this->build(); @@ -84,14 +85,14 @@ AlgorithmLauncher AlgorithmPlanner::get_launcher() return launchers[launch_key]; } -AlgorithmLauncher AlgorithmPlanner::build() +std::shared_ptr AlgorithmPlanner::build() { int device = 0; int major = 0; int minor = 0; - cudaGetDevice(&device); - cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device); - cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device); + RAFT_CUDA_TRY(cudaGetDevice(&device)); + RAFT_CUDA_TRY(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device)); + RAFT_CUDA_TRY(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device)); std::string archs = "-arch=sm_" + std::to_string((major * 10 + minor)); @@ -124,12 +125,13 @@ AlgorithmLauncher AlgorithmPlanner::build() // cubin is linked, so now load it cudaLibrary_t library; - cudaLibraryLoadData(&library, cubin.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); + RAFT_CUDA_TRY( + cudaLibraryLoadData(&library, cubin.get(), nullptr, nullptr, 0, nullptr, nullptr, 0)); - unsigned int count = 1; + constexpr unsigned int count = 1; // Still need to cache/compute the mangled name - std::unique_ptr kernels_{new cudaKernel_t[count]}; - cudaLibraryEnumerateKernels(kernels_.get(), count, library); + std::unique_ptr kernels{new cudaKernel_t[count]}; + RAFT_CUDA_TRY(cudaLibraryEnumerateKernels(kernels.get(), count, library)); - return AlgorithmLauncher{library, kernels_[0]}; + return std::make_shared(kernels.release()[0]); } diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index 3311fe43b4..e50ca51254 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -207,7 +207,7 @@ void launch_kernel(const index& index, if (grid_dim_x == 0) { grid_dim_x = configure_launch_x( - std::min(kMaxGridY, num_queries), n_probes, smem_size, kernel_launcher.get_kernel()); + std::min(kMaxGridY, num_queries), n_probes, smem_size, kernel_launcher->get_kernel()); return; } @@ -223,28 +223,28 @@ void launch_kernel(const index& index, block_dim.x, n_probes, smem_size); - kernel_launcher(stream, - grid_dim, - block_dim, - smem_size, - query_smem_elems, - queries, - coarse_index, - index.data_ptrs().data_handle(), - index.list_sizes().data_handle(), - queries_offset + query_offset, - n_probes, - k, - max_samples, - chunk_indices, - index.dim(), - // sample_filter, - inds_ptrs, - bitset_ptr.value_or(nullptr), - bitset_len.value_or(0), - original_nbits.value_or(0), - neighbors, - distances); + kernel_launcher->dispatch(stream, + grid_dim, + block_dim, + smem_size, + query_smem_elems, + queries, + coarse_index, + index.data_ptrs().data_handle(), + index.list_sizes().data_handle(), + queries_offset + query_offset, + n_probes, + k, + max_samples, + chunk_indices, + index.dim(), + // sample_filter, + inds_ptrs, + bitset_ptr.value_or(nullptr), + bitset_len.value_or(0), + original_nbits.value_or(0), + neighbors, + distances); queries += grid_dim_y * index.dim(); if constexpr (Capacity > 0) { neighbors += grid_dim_y * grid_dim_x * k; From d0ff62c0d442acffe40b276ed46718f620178854 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 23 Oct 2025 22:45:59 +0000 Subject: [PATCH 026/158] add AlgorithmPlanner to main target --- cpp/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index dbc1b9a5b7..404346f44f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -351,8 +351,8 @@ if(NOT BUILD_CPU_ONLY) ) endif() - set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/FragmentDatabase.cu - src/detail/jit_lto/FragmentEntry.cu + set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/AlgorithmPlanner.cu + src/detail/jit_lto/FragmentDatabase.cu src/detail/jit_lto/FragmentEntry.cu ) add_library( @@ -621,7 +621,7 @@ if(NOT BUILD_CPU_ONLY) ) target_link_libraries(jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) - add_library(jit_lto_fatbins_as_cpp_sources STATIC src/detail/jit_lto/AlgorithmPlanner.cu) + add_library(jit_lto_fatbins_as_cpp_sources STATIC) # Set PIC for the static library since it will be linked into a shared library set_target_properties( From 445a6c42101a0a7649f0b62d056769b6f8775bb3 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 23 Oct 2025 23:28:38 +0000 Subject: [PATCH 027/158] remove nvjitlink as cuda 12 dep --- .../environments/all_cuda-129_arch-aarch64.yaml | 1 - conda/environments/all_cuda-129_arch-x86_64.yaml | 1 - .../bench_ann_cuda-129_arch-aarch64.yaml | 1 - .../bench_ann_cuda-129_arch-x86_64.yaml | 1 - conda/environments/go_cuda-129_arch-aarch64.yaml | 1 - conda/environments/go_cuda-129_arch-x86_64.yaml | 1 - .../environments/rust_cuda-129_arch-aarch64.yaml | 1 - .../environments/rust_cuda-129_arch-x86_64.yaml | 1 - conda/recipes/libcuvs/recipe.yaml | 16 ++++++++++++---- dependencies.yaml | 12 ++++++++++-- 10 files changed, 22 insertions(+), 14 deletions(-) diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index c7d0d8b659..af56842a11 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -31,7 +31,6 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 0969780718..44739ffd6a 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -31,7 +31,6 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml index 7461806ff0..a0448933a2 100644 --- a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml @@ -31,7 +31,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - nccl>=2.19 diff --git a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml index f1b7bb44c2..5fda0470d6 100644 --- a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml @@ -33,7 +33,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - librmm==25.12.*,>=0.0.0a0 - matplotlib-base>=3.9 - mkl-devel=2023 diff --git a/conda/environments/go_cuda-129_arch-aarch64.yaml b/conda/environments/go_cuda-129_arch-aarch64.yaml index 45e8f94697..b8bf557877 100644 --- a/conda/environments/go_cuda-129_arch-aarch64.yaml +++ b/conda/environments/go_cuda-129_arch-aarch64.yaml @@ -25,7 +25,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/go_cuda-129_arch-x86_64.yaml b/conda/environments/go_cuda-129_arch-x86_64.yaml index ce137edfce..adc12d644b 100644 --- a/conda/environments/go_cuda-129_arch-x86_64.yaml +++ b/conda/environments/go_cuda-129_arch-x86_64.yaml @@ -25,7 +25,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-129_arch-aarch64.yaml b/conda/environments/rust_cuda-129_arch-aarch64.yaml index 216ea42da4..28d7701d68 100644 --- a/conda/environments/rust_cuda-129_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-129_arch-aarch64.yaml @@ -22,7 +22,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/environments/rust_cuda-129_arch-x86_64.yaml b/conda/environments/rust_cuda-129_arch-x86_64.yaml index e9b8726a47..a21932185b 100644 --- a/conda/environments/rust_cuda-129_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-129_arch-x86_64.yaml @@ -22,7 +22,6 @@ dependencies: - libcusolver-dev - libcusparse-dev - libcuvs==25.12.*,>=0.0.0a0 -- libnvjitlink-dev - libraft==25.12.*,>=0.0.0a0 - make - nccl>=2.19 diff --git a/conda/recipes/libcuvs/recipe.yaml b/conda/recipes/libcuvs/recipe.yaml index 4516c4191c..54dd46d980 100644 --- a/conda/recipes/libcuvs/recipe.yaml +++ b/conda/recipes/libcuvs/recipe.yaml @@ -58,7 +58,9 @@ cache: - ninja - ${{ stdlib("c") }} host: - - libnvjitlink-dev + - if: cuda_major == "13" + then: + - libnvjitlink-dev - librmm =${{ minor_version }} - libraft-headers =${{ minor_version }} - nccl ${{ nccl_version }} @@ -107,7 +109,9 @@ outputs: - libcurand-dev - libcusolver-dev - libcusparse-dev - - libnvjitlink-dev + - if: cuda_major == "13" + then: + - libnvjitlink-dev run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - libraft-headers =${{ minor_version }} @@ -117,7 +121,9 @@ outputs: - libcurand - libcusolver - libcusparse - - libnvjitlink + - if: cuda_major == "13" + then: + - libnvjitlink ignore_run_exports: by_name: - cuda-cudart @@ -128,7 +134,9 @@ outputs: - libcurand - libcusolver - libcusparse - - libnvjitlink + - if: cuda_major == "13" + then: + - libnvjitlink - librmm - mkl - nccl diff --git a/dependencies.yaml b/dependencies.yaml index 77c987b8de..d878af0174 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -321,7 +321,16 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev - - libnvjitlink-dev + specific: + - output_types: conda + matrices: + - matrix: + cuda: "13.*" + packages: + - libnvjitlink-dev + - matrix: + cuda: "12.*" + packages: cuda_wheels: specific: - output_types: [requirements, pyproject] @@ -331,7 +340,6 @@ dependencies: use_cuda_wheels: "true" packages: - cuda-toolkit[cublas,curand,cusolver,cusparse]==12.* - - nvidia-nvjitlink-cu12 - matrix: cuda: "13.*" use_cuda_wheels: "true" From 92a27d49093923d71e078251fbab7d0ff036c6af Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 24 Oct 2025 21:11:56 +0000 Subject: [PATCH 028/158] address review --- cpp/CMakeLists.txt | 117 ++++++++---------- .../{ => detail}/generate_header.cmake | 3 - cpp/cmake/modules/embed_fatbins.cmake | 4 +- .../generate_interleaved_scan_kernels.cmake | 103 --------------- .../modules/generate_jit_lto_kernels.cmake | 58 +++++++++ .../ivf_flat}/interleaved_scan_tags.hpp | 0 .../ivf_flat_interleaved_scan_jit.cuh | 2 +- .../jit_lto_kernels/generate_kernels.py | 62 +++++++++- 8 files changed, 170 insertions(+), 179 deletions(-) rename cpp/cmake/modules/{ => detail}/generate_header.cmake (95%) delete mode 100644 cpp/cmake/modules/generate_interleaved_scan_kernels.cmake create mode 100644 cpp/cmake/modules/generate_jit_lto_kernels.cmake rename cpp/{src/neighbors/ivf_flat/jit_lto_kernels => include/cuvs/detail/jit_lto/ivf_flat}/interleaved_scan_tags.hpp (100%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 404346f44f..3cbb6511f6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -211,22 +211,6 @@ if(BUILD_CAGRA_HNSWLIB) include(cmake/thirdparty/get_hnswlib.cmake) endif() -set(JIT_LTO_TARGET_ARCHITECTURE "") -set(JIT_LTO_COMPILATION OFF) -if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) - set(JIT_LTO_TARGET_ARCHITECTURE "75-real") - set(JIT_LTO_COMPILATION ON) -endif() - -if(JIT_LTO_COMPILATION) - # this is needed to embed fatbins to JIT at runtime - include(cmake/modules/embed_fatbins.cmake) - - # Generate interleaved scan kernel files at build time - include(cmake/modules/generate_interleaved_scan_kernels.cmake) - generate_interleaved_scan_kernels() -endif() - # ################################################################################################## # * cuvs --------------------------------------------------------------------- if(NOT BUILD_CPU_ONLY) @@ -351,9 +335,30 @@ if(NOT BUILD_CPU_ONLY) ) endif() - set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/AlgorithmPlanner.cu - src/detail/jit_lto/FragmentDatabase.cu src/detail/jit_lto/FragmentEntry.cu - ) + set(JIT_LTO_TARGET_ARCHITECTURE "") + set(JIT_LTO_COMPILATION OFF) + set(JIT_LTO_FILES "") + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + set(JIT_LTO_TARGET_ARCHITECTURE "75-real") + set(JIT_LTO_COMPILATION ON) + endif() + + if(JIT_LTO_COMPILATION) + # this is needed to embed fatbins to JIT at runtime + include(cmake/modules/embed_fatbins.cmake) + + # Generate interleaved scan kernel files at build time + include(cmake/modules/generate_jit_lto_kernels.cmake) + generate_jit_lto_kernels( + "interleaved_scan" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py" + ) + + set(JIT_LTO_FILES + src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/AlgorithmPlanner.cu + src/detail/jit_lto/FragmentDatabase.cu src/detail/jit_lto/FragmentEntry.cu + ) + endif() add_library( cuvs_objs OBJECT @@ -587,31 +592,32 @@ if(NOT BUILD_CPU_ONLY) if(JIT_LTO_COMPILATION) add_library( - jit_lto_fatbins OBJECT ${INTERLEAVED_SCAN_KERNEL_FILES} ${METRIC_DEVICE_FUNCTION_FILES} - ${FILTER_DEVICE_FUNCTION_FILES} ${POST_LAMBDA_DEVICE_FUNCTION_FILES} + cuvs_jit_lto_fatbins OBJECT + ${INTERLEAVED_SCAN_KERNEL_FILES} ${METRIC_DEVICE_FUNCTION_FILES} + ${FILTER_DEVICE_FUNCTION_FILES} ${POST_LAMBDA_DEVICE_FUNCTION_FILES} ) # Make sure the kernels are generated before we try to build them - add_dependencies(jit_lto_fatbins ${INTERLEAVED_SCAN_KERNELS_TARGET}) + add_dependencies(cuvs_jit_lto_fatbins ${INTERLEAVED_SCAN_KERNELS_TARGET}) - target_compile_definitions(jit_lto_fatbins PRIVATE BUILD_KERNEL) + target_compile_definitions(cuvs_jit_lto_fatbins PRIVATE BUILD_KERNEL) target_include_directories( - jit_lto_fatbins + cuvs_jit_lto_fatbins PRIVATE "$" "$" "$" ) - target_compile_options(jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size) target_compile_options( - jit_lto_fatbins PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" + cuvs_jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size + ) + target_compile_options( + cuvs_jit_lto_fatbins PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" ) set_target_properties( - jit_lto_fatbins + cuvs_jit_lto_fatbins PROPERTIES CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON CUDA_SEPARABLE_COMPILATION ON @@ -619,31 +625,7 @@ if(NOT BUILD_CPU_ONLY) POSITION_INDEPENDENT_CODE ON INTERPROCEDURAL_OPTIMIZATION ON ) - target_link_libraries(jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) - - add_library(jit_lto_fatbins_as_cpp_sources STATIC) - - # Set PIC for the static library since it will be linked into a shared library - set_target_properties( - jit_lto_fatbins_as_cpp_sources - PROPERTIES CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - ) - - # Make sure the kernels are generated before embedding fatbins - add_dependencies(jit_lto_fatbins_as_cpp_sources ${INTERLEAVED_SCAN_KERNELS_TARGET}) - - target_include_directories( - jit_lto_fatbins_as_cpp_sources - PRIVATE "$" - "$" - "$" - ) - - embed_fatbins(jit_lto_fatbins_as_cpp_sources jit_lto_fatbins) + target_link_libraries(cuvs_jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) endif() # Endian detection @@ -719,14 +701,15 @@ if(NOT BUILD_CPU_ONLY) $> $> $<$:CUDA::nvtx3> - PRIVATE - nvidia::cutlass::cutlass - $ - cuvs-cagra-search - $<$:$> - $<$:CUDA::nvJitLink> + PRIVATE nvidia::cutlass::cutlass $ + cuvs-cagra-search $<$:CUDA::nvJitLink> ) + if(JIT_LTO_COMPILATION) + add_dependencies(cuvs ${INTERLEAVED_SCAN_KERNELS_TARGET}) + embed_fatbins(cuvs cuvs_jit_lto_fatbins) + endif() + # ensure CUDA symbols aren't relocated to the middle of the debug build binaries file( WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld" @@ -783,12 +766,14 @@ SECTIONS $> $> $<$:CUDA::nvtx3> - PRIVATE - nvidia::cutlass::cutlass - $ - $<$:$> - $<$:CUDA::nvJitLink> + PRIVATE nvidia::cutlass::cutlass $ + $<$:CUDA::nvJitLink> ) + + if(JIT_LTO_COMPILATION) + add_dependencies(cuvs_static ${INTERLEAVED_SCAN_KERNELS_TARGET}) + embed_fatbins(cuvs_static cuvs_jit_lto_fatbins) + endif() endif() # ################################################################################################ diff --git a/cpp/cmake/modules/generate_header.cmake b/cpp/cmake/modules/detail/generate_header.cmake similarity index 95% rename from cpp/cmake/modules/generate_header.cmake rename to cpp/cmake/modules/detail/generate_header.cmake index 83c48cb086..86851e8f47 100644 --- a/cpp/cmake/modules/generate_header.cmake +++ b/cpp/cmake/modules/detail/generate_header.cmake @@ -18,9 +18,6 @@ if(DEFINED OBJECTS_RESPONSE_FILE) string(STRIP "${objects_content}" objects_content) # Split by newlines since we joined with \n in the CMake file string(REPLACE "\n" ";" objects_list "${objects_content}") -else() - # Fallback to direct objects (for backward compatibility) - set(objects_list "${OBJECTS}") endif() # Create output directory if it doesn't exist diff --git a/cpp/cmake/modules/embed_fatbins.cmake b/cpp/cmake/modules/embed_fatbins.cmake index d16421b626..272c1f4a70 100644 --- a/cpp/cmake/modules/embed_fatbins.cmake +++ b/cpp/cmake/modules/embed_fatbins.cmake @@ -23,7 +23,7 @@ function(embed_fatbins library_name kernel_target) set(output_dir ${CMAKE_CURRENT_BINARY_DIR}/${library_name}) # Create a response file to avoid "argument list too long" errors - set(objects_response_file ${CMAKE_CURRENT_BINARY_DIR}/${library_name}_objects.rsp) + set(objects_response_file ${CMAKE_CURRENT_BINARY_DIR}/embed_fatbins/${library_name}_objects.rsp) # Write the objects list to a response file using file(GENERATE) which handles generator # expressions @@ -40,7 +40,7 @@ function(embed_fatbins library_name kernel_target) ${CMAKE_COMMAND} "-DBIN_TO_C_COMMAND=${bin_to_c}" "-DOBJECTS_RESPONSE_FILE=${objects_response_file}" "-DOUTPUT_DIR=${output_dir}" "-DSTAMP_FILE=${output_dir}/headers_generated.stamp" -P - ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/generate_header.cmake + ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/detail/generate_header.cmake VERBATIM DEPENDS "${objects_response_file}" $ COMMENT "Converting FATBIN kernels to individual C++ headers" diff --git a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake b/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake deleted file mode 100644 index 2cc76f8f28..0000000000 --- a/cpp/cmake/modules/generate_interleaved_scan_kernels.cmake +++ /dev/null @@ -1,103 +0,0 @@ -# ============================================================================= -# Copyright (c) 2025, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -# Generate interleaved scan kernel files at build time -function(generate_interleaved_scan_kernels) - find_package(Python3 REQUIRED COMPONENTS Interpreter) - - set(GENERATOR_SCRIPT - ${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py - ) - set(OUTPUT_BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated_kernels) - set(CMAKE_LIST_FILE ${OUTPUT_BASE_DIR}/interleaved_scan.cmake) - - # Generate the kernels at build time - add_custom_command( - OUTPUT ${CMAKE_LIST_FILE} - COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} ${OUTPUT_BASE_DIR} - DEPENDS ${GENERATOR_SCRIPT} - COMMENT "Generating interleaved scan kernel files..." - VERBATIM - ) - - # Create a custom target that depends on the generated CMake file - add_custom_target(generate_interleaved_scan_kernels_target DEPENDS ${CMAKE_LIST_FILE}) - - # Include the generated CMake list file Only generate if the CMake list file doesn't exist - if(NOT EXISTS ${CMAKE_LIST_FILE}) - message(VERBOSE "Generating interleaved scan kernels for the first time...") - execute_process( - COMMAND ${Python3_EXECUTABLE} ${GENERATOR_SCRIPT} ${OUTPUT_BASE_DIR} - RESULT_VARIABLE GENERATION_RESULT - OUTPUT_VARIABLE GENERATION_OUTPUT - ERROR_VARIABLE GENERATION_ERROR - ) - - if(NOT GENERATION_RESULT EQUAL 0) - message( - FATAL_ERROR - "Failed to generate kernel files during configuration\nOutput: ${GENERATION_OUTPUT}\nError: ${GENERATION_ERROR}" - ) - endif() - endif() - - # Include the generated CMake file - include(${CMAKE_LIST_FILE}) - - # Prepend the binary directory path to all kernel files - set(FULL_PATH_KERNEL_FILES) - foreach(kernel_file ${INTERLEAVED_SCAN_KERNEL_FILES}) - list(APPEND FULL_PATH_KERNEL_FILES ${CMAKE_CURRENT_BINARY_DIR}/${kernel_file}) - endforeach() - - # Prepend the binary directory path to all metric device function files - set(FULL_PATH_METRIC_FILES) - foreach(metric_file ${METRIC_DEVICE_FUNCTION_FILES}) - list(APPEND FULL_PATH_METRIC_FILES ${CMAKE_CURRENT_BINARY_DIR}/${metric_file}) - endforeach() - - # Prepend the binary directory path to all filter device function files - set(FULL_PATH_FILTER_FILES) - foreach(filter_file ${FILTER_DEVICE_FUNCTION_FILES}) - list(APPEND FULL_PATH_FILTER_FILES ${CMAKE_CURRENT_BINARY_DIR}/${filter_file}) - endforeach() - - # Prepend the binary directory path to all post lambda device function files - set(FULL_PATH_POST_LAMBDA_FILES) - foreach(post_lambda_file ${POST_LAMBDA_DEVICE_FUNCTION_FILES}) - list(APPEND FULL_PATH_POST_LAMBDA_FILES ${CMAKE_CURRENT_BINARY_DIR}/${post_lambda_file}) - endforeach() - - # Return the lists to parent scope - set(INTERLEAVED_SCAN_KERNEL_FILES - ${FULL_PATH_KERNEL_FILES} - PARENT_SCOPE - ) - set(METRIC_DEVICE_FUNCTION_FILES - ${FULL_PATH_METRIC_FILES} - PARENT_SCOPE - ) - set(FILTER_DEVICE_FUNCTION_FILES - ${FULL_PATH_FILTER_FILES} - PARENT_SCOPE - ) - set(POST_LAMBDA_DEVICE_FUNCTION_FILES - ${FULL_PATH_POST_LAMBDA_FILES} - PARENT_SCOPE - ) - set(INTERLEAVED_SCAN_KERNELS_TARGET - generate_interleaved_scan_kernels_target - PARENT_SCOPE - ) -endfunction() diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake new file mode 100644 index 0000000000..e2ddbfb3bf --- /dev/null +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -0,0 +1,58 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Generate JIT LTO kernel files at build time using a Python generator script Arguments: kernel_name +# - Name of the kernel type (e.g., "interleaved_scan") generator_script - Path to the Python script +# that generates the kernels +function(generate_jit_lto_kernels kernel_name generator_script) + find_package(Python3 REQUIRED COMPONENTS Interpreter) + + set(OUTPUT_BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated_kernels) + set(GENERATED_CMAKE_FILE ${OUTPUT_BASE_DIR}/${kernel_name}.cmake) + + # Generate the kernels at build time + add_custom_command( + OUTPUT ${GENERATED_CMAKE_FILE} + COMMAND ${Python3_EXECUTABLE} ${generator_script} ${OUTPUT_BASE_DIR} ${kernel_name} + DEPENDS ${generator_script} + COMMENT "Generating ${kernel_name} kernel files..." + VERBATIM + ) + + # Create a custom target that depends on the generated CMake file Use a unique target name based + # on the kernel name + set(TARGET_NAME "generate_${kernel_name}_kernels_target") + add_custom_target(${TARGET_NAME} DEPENDS ${GENERATED_CMAKE_FILE}) + + # Only generate if the CMake file doesn't exist + if(NOT EXISTS ${GENERATED_CMAKE_FILE}) + message(VERBOSE "Generating ${kernel_name} kernels for the first time...") + execute_process( + COMMAND ${Python3_EXECUTABLE} ${generator_script} ${OUTPUT_BASE_DIR} ${kernel_name} + RESULT_VARIABLE GENERATION_RESULT + OUTPUT_VARIABLE GENERATION_OUTPUT + ERROR_VARIABLE GENERATION_ERROR + ) + + if(NOT GENERATION_RESULT EQUAL 0) + message( + FATAL_ERROR + "Failed to generate kernel files during configuration\nOutput: ${GENERATION_OUTPUT}\nError: ${GENERATION_ERROR}" + ) + endif() + endif() + + # Include the generated CMake file The generated file handles setting variables to PARENT_SCOPE + include(${GENERATED_CMAKE_FILE}) +endfunction() diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp b/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp similarity index 100% rename from cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_tags.hpp rename to cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index e50ca51254..1d0888c524 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -18,8 +18,8 @@ #include "../ivf_common.cuh" #include "jit_lto_kernels/interleaved_scan_planner.hpp" -#include "jit_lto_kernels/interleaved_scan_tags.hpp" #include +#include #include #include diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py index a480e6ec4e..cc7c8597ca 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py @@ -141,7 +141,7 @@ def generate_cuda_file_content(params): #else #include -#include +#include #include "interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params['type_abbrev']}_{params['acc_abbrev']}_{params['idx_abbrev']}.h" using namespace cuvs::neighbors::ivf_flat::detail; @@ -203,7 +203,7 @@ def generate_metric_device_function_content(metric_name, veclen, data_type, acc_ #else #include -#include +#include #include "metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}.h" using namespace cuvs::neighbors::ivf_flat::detail; @@ -408,6 +408,10 @@ def main(): # Output directory - use CMAKE_CURRENT_BINARY_DIR if provided, otherwise use source dir output_base_dir = Path(sys.argv[1]).absolute() if len(sys.argv) > 1 else script_dir + + # Kernel name - use provided name if available, otherwise default to "interleaved_scan" + kernel_name = sys.argv[2] if len(sys.argv) > 2 else "interleaved_scan" + output_dir = output_base_dir / 'interleaved_scan_kernels' output_dir.mkdir(parents=True, exist_ok=True) @@ -435,11 +439,13 @@ def main(): # Generate post lambda device function files post_lambda_files = generate_post_lambda_device_functions(output_base_dir) + # Generate CMake file listing all generated files - cmake_file = output_base_dir / 'interleaved_scan.cmake' + cmake_file = output_base_dir / f'{kernel_name}.cmake' cmake_content = "# Auto-generated file listing all kernel and device function files\n\n" + # Set relative path lists cmake_content += "set(INTERLEAVED_SCAN_KERNEL_FILES\n" for filename in sorted(generated_files): cmake_content += f" generated_kernels/interleaved_scan_kernels/{filename}\n" @@ -458,7 +464,55 @@ def main(): cmake_content += "set(POST_LAMBDA_DEVICE_FUNCTION_FILES\n" for filename in sorted(post_lambda_files): cmake_content += f" generated_kernels/post_lambda_device_functions/{filename}\n" - cmake_content += ")\n" + cmake_content += ")\n\n" + + # Add logic to prepend CMAKE_CURRENT_BINARY_DIR and set variables to PARENT_SCOPE + cmake_content += f"""# Prepend the binary directory path to all kernel files +set(FULL_PATH_KERNEL_FILES) +foreach(kernel_file ${{INTERLEAVED_SCAN_KERNEL_FILES}}) + list(APPEND FULL_PATH_KERNEL_FILES ${{CMAKE_CURRENT_BINARY_DIR}}/${{kernel_file}}) +endforeach() + +# Prepend the binary directory path to all metric device function files +set(FULL_PATH_METRIC_FILES) +foreach(metric_file ${{METRIC_DEVICE_FUNCTION_FILES}}) + list(APPEND FULL_PATH_METRIC_FILES ${{CMAKE_CURRENT_BINARY_DIR}}/${{metric_file}}) +endforeach() + +# Prepend the binary directory path to all filter device function files +set(FULL_PATH_FILTER_FILES) +foreach(filter_file ${{FILTER_DEVICE_FUNCTION_FILES}}) + list(APPEND FULL_PATH_FILTER_FILES ${{CMAKE_CURRENT_BINARY_DIR}}/${{filter_file}}) +endforeach() + +# Prepend the binary directory path to all post lambda device function files +set(FULL_PATH_POST_LAMBDA_FILES) +foreach(post_lambda_file ${{POST_LAMBDA_DEVICE_FUNCTION_FILES}}) + list(APPEND FULL_PATH_POST_LAMBDA_FILES ${{CMAKE_CURRENT_BINARY_DIR}}/${{post_lambda_file}}) +endforeach() + +# Return the lists to parent scope +set(INTERLEAVED_SCAN_KERNEL_FILES + ${{FULL_PATH_KERNEL_FILES}} + PARENT_SCOPE +) +set(METRIC_DEVICE_FUNCTION_FILES + ${{FULL_PATH_METRIC_FILES}} + PARENT_SCOPE +) +set(FILTER_DEVICE_FUNCTION_FILES + ${{FULL_PATH_FILTER_FILES}} + PARENT_SCOPE +) +set(POST_LAMBDA_DEVICE_FUNCTION_FILES + ${{FULL_PATH_POST_LAMBDA_FILES}} + PARENT_SCOPE +) +set(INTERLEAVED_SCAN_KERNELS_TARGET + generate_{kernel_name}_kernels_target + PARENT_SCOPE +) +""" # Only write if content has changed if not cmake_file.exists() or cmake_file.read_text() != cmake_content: From 67579f45f692a5aa4b1ced08dc6461a6328176bb Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 27 Oct 2025 18:26:21 +0000 Subject: [PATCH 029/158] add include guard --- cpp/cmake/modules/embed_fatbins.cmake | 2 ++ cpp/cmake/modules/generate_jit_lto_kernels.cmake | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cpp/cmake/modules/embed_fatbins.cmake b/cpp/cmake/modules/embed_fatbins.cmake index 272c1f4a70..09f9d4ea0d 100644 --- a/cpp/cmake/modules/embed_fatbins.cmake +++ b/cpp/cmake/modules/embed_fatbins.cmake @@ -12,6 +12,8 @@ # the License. # ============================================================================= +include_guard() + function(embed_fatbins library_name kernel_target) find_package(CUDAToolkit REQUIRED) find_program( diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index e2ddbfb3bf..6b895a8180 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -12,6 +12,8 @@ # the License. # ============================================================================= +include_guard() + # Generate JIT LTO kernel files at build time using a Python generator script Arguments: kernel_name # - Name of the kernel type (e.g., "interleaved_scan") generator_script - Path to the Python script # that generates the kernels From 7ad8774b72fc3012cdc9543062253788e516da64 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 27 Oct 2025 18:29:47 +0000 Subject: [PATCH 030/158] add and remove couple of comments --- cpp/include/cuvs/detail/jit_lto/FragmentEntry.h | 1 - cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h index 3092180444..5a65950a13 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h @@ -30,7 +30,6 @@ struct FragmentEntry { virtual bool add_to(nvJitLinkHandle& handle) const = 0; - // std::size_t compute_arg_count = 0; //optimization for equality checks std::string compute_key{}; }; diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 1561af0e52..bfbfc9de4a 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -124,12 +124,13 @@ std::shared_ptr AlgorithmPlanner::build() check_nvjitlink_result(handle, result); // cubin is linked, so now load it + // NOTE: cudaLibrary_t does not need to be freed explicitly cudaLibrary_t library; RAFT_CUDA_TRY( cudaLibraryLoadData(&library, cubin.get(), nullptr, nullptr, 0, nullptr, nullptr, 0)); constexpr unsigned int count = 1; - // Still need to cache/compute the mangled name + // NOTE: cudaKernel_t does not need to be freed explicitly std::unique_ptr kernels{new cudaKernel_t[count]}; RAFT_CUDA_TRY(cudaLibraryEnumerateKernels(kernels.get(), count, library)); From ab35ef3750778e473dcdd4a1dee331b4b22f989a Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 27 Oct 2025 18:49:05 +0000 Subject: [PATCH 031/158] delete readme --- .../ivf_flat/jit_lto_kernels/README.md | 73 ------------------- 1 file changed, 73 deletions(-) delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md deleted file mode 100644 index 29434c49be..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/README.md +++ /dev/null @@ -1,73 +0,0 @@ -# Interleaved Scan Kernel Generation - -This directory contains the tools for creating CUDA kernel instantiations for the interleaved scan functionality. The kernel files are **generated at build time** and are not checked into version control. - -## Files - -- `interleaved_scan_kernels.txt` - List of kernel function signatures (1280 entries) -- `generate_kernels.py` - Python script to generate .cu files from the kernel list -- `.gitignore` - Ignores generated `.cu` files - -## Build-Time Generation - -The kernel files are automatically generated during the CMake configuration phase. The process is handled by: - -1. **CMake Module**: `cpp/cmake/modules/generate_interleaved_scan_kernels.cmake` -2. **Generator Script**: `generate_kernels.py` (this directory) - -### How It Works - -During CMake configuration: -1. Python script is executed to generate 1280 `.cu` files -2. Files are placed in the build directory (not source directory) -3. Generated CMake list file is included automatically -4. Build targets depend on the generation step - -### Manual Regeneration (Optional) - -If you need to manually generate files (e.g., for inspection): - -```bash -cd /path/to/cuvs/cpp/src/neighbors/ivf_flat/jit_lto_kernels -python3 generate_kernels.py -``` - -**Note**: Manual generation is not required for normal builds. - -## Template Parameters - -Each kernel is parameterized by 10 template arguments: - -1. **kBlockSize** (0, 1, 2, 4, 8, 16, 32, 64, 128, 256) -2. **VecLen** (1, 4, 8, 16) -3. **kManageLocalTopK** (true, false) -4. **kPrecompBaseDiff** (true, false) -5. **T** (float, __half, unsigned char, signed char) -6. **AccT** (float, __half, unsigned int, int) -7. **IdxT** (long) -8. **FilterT** (none_sample_filter → 'n', bitset_filter → 'b') -9. **DistanceT** (inner_prod_dist → 'inner_N', euclidean_dist → 'euclidean_N') -10. **FinalLambda** (identity_op → 'id', sqrt_op → 'sqrt', compose_op → 'compose') - -## Filename Convention - -Files follow the pattern: -``` -interleaved_scan_kernel__________.cu -``` - -Example: -``` -Template: <0, 1, false, false, float, float, long, none_sample_filter, inner_prod_dist<1>, identity_op> -Filename: interleaved_scan_kernel_0_1_false_false_f_f_l_n_inner_1_id.cu -``` - -## File Structure - -Each generated `.cu` file contains: - -1. **Apache 2.0 License Header** -2. **Include**: `#include "../ivf_flat_interleaved_scan_jit.cuh"` -3. **Conditional compilation**: - - `#ifdef BUILD_KERNEL`: Template instantiation - - `#else`: Registration function for JIT/LTO system From cdd4c85a82f358b08a73a532f7bc10b302b217cb Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 27 Oct 2025 18:50:40 +0000 Subject: [PATCH 032/158] increase warmup time --- python/cuvs_bench/cuvs_bench/run/runners.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuvs_bench/cuvs_bench/run/runners.py b/python/cuvs_bench/cuvs_bench/run/runners.py index 522636e6c7..f31121df1c 100644 --- a/python/cuvs_bench/cuvs_bench/run/runners.py +++ b/python/cuvs_bench/cuvs_bench/run/runners.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -148,7 +148,7 @@ def cuvs_bench_cpp( "--benchmark_counters_tabular=true", f"--override_kv=k:{k}", f"--override_kv=n_queries:{batch_size}", - "--benchmark_min_warmup_time=1", + "--benchmark_min_warmup_time=4", "--benchmark_out_format=json", f"--mode={mode}", f"--benchmark_out={os.path.join(search_folder, search_file)}", From c1eff9fe7c236bb01ff1d503955b03850742ddb2 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 27 Oct 2025 19:27:38 +0000 Subject: [PATCH 033/158] use new copyright --- .../cuvs/detail/jit_lto/AlgorithmLauncher.h | 15 +--- .../cuvs/detail/jit_lto/AlgorithmPlanner.h | 15 +--- .../cuvs/detail/jit_lto/FragmentDatabase.h | 15 +--- .../cuvs/detail/jit_lto/FragmentEntry.h | 15 +--- .../cuvs/detail/jit_lto/MakeFragmentKey.h | 16 +--- .../detail/jit_lto/RegisterKernelFragment.h | 16 +--- .../ivf_flat/interleaved_scan_tags.hpp | 15 +--- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 15 +--- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 15 +--- cpp/src/detail/jit_lto/FragmentDatabase.cu | 15 +--- cpp/src/detail/jit_lto/FragmentEntry.cu | 15 +--- .../ivf_flat_interleaved_scan_jit.cuh | 15 +--- .../ivf_flat_interleaved_scan_kernel.cuh | 15 +--- .../jit_lto_kernels/filter_bitset.cuh | 15 +--- .../ivf_flat/jit_lto_kernels/filter_none.cuh | 15 +--- .../jit_lto_kernels/generate_kernels.py | 73 +++---------------- .../interleaved_scan_planner.hpp | 15 +--- .../jit_lto_kernels/metric_euclidean_dist.cuh | 15 +--- .../jit_lto_kernels/metric_inner_product.cuh | 15 +--- .../ivf_flat/jit_lto_kernels/post_compose.cuh | 15 +--- .../jit_lto_kernels/post_identity.cuh | 15 +--- .../ivf_flat/jit_lto_kernels/post_sqrt.cuh | 15 +--- 22 files changed, 52 insertions(+), 338 deletions(-) diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h index ad01e28510..bcc50d8207 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h index c8e23122e8..94cf419de6 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h index 3e4b6703e2..007726532e 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h index 5a65950a13..39eca2f187 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h index d7f37e012d..8a0e54f699 100644 --- a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h +++ b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h @@ -1,18 +1,6 @@ - /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h b/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h index 560d09dc1d..bbf3fe0ca3 100644 --- a/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h +++ b/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h @@ -1,18 +1,6 @@ - /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp b/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp index 2fc670a8e9..b201f7c044 100644 --- a/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp +++ b/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 56b854263d..5b786e95cf 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #include diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index bfbfc9de4a..4db3a09140 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #include diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index 493405a170..3ab97145d4 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #include diff --git a/cpp/src/detail/jit_lto/FragmentEntry.cu b/cpp/src/detail/jit_lto/FragmentEntry.cu index 5924974faa..9d9349bdd0 100644 --- a/cpp/src/detail/jit_lto/FragmentEntry.cu +++ b/cpp/src/detail/jit_lto/FragmentEntry.cu @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #include diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index 1d0888c524..0ad344684b 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2022-2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh index 83f4984319..f7fc69e4ae 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh index 95ea61e194..7f171d5729 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh index 61dfc9de84..61af472469 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py index cc7c8597ca..1df3aee8ca 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py @@ -1,15 +1,6 @@ # ============================================================================= -# Copyright (c) 2025, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 # ============================================================================= #!/usr/bin/env python3 @@ -107,19 +98,8 @@ def generate_cuda_file_content(params): idx_type = params['idx_type'] content = f"""/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. @@ -172,19 +152,8 @@ def generate_metric_device_function_content(metric_name, veclen, data_type, acc_ header_file = 'neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh' content = f"""/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. @@ -230,19 +199,8 @@ def generate_filter_device_function_content(filter_name): header_file = 'neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh' content = f"""/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. @@ -286,19 +244,8 @@ def generate_post_lambda_device_function_content(post_lambda_name): header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh' content = f"""/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp index e6695fb08f..82124f3409 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh index 8d6327ce1e..f197d3f218 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh index 09b701e7c4..c6600c526c 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh index ba9b60b238..6cbb19c3b2 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh index 7e17bf481d..7ea402a312 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh index 1cfd755445..ca23b2a6a5 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once From ece09b8df100fbff29435ddbace2fcbbf7a54eab Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 27 Oct 2025 19:40:53 +0000 Subject: [PATCH 034/158] new copyright --- cpp/cmake/modules/detail/generate_header.cmake | 15 ++++----------- cpp/cmake/modules/embed_fatbins.cmake | 15 ++++----------- cpp/cmake/modules/generate_jit_lto_kernels.cmake | 15 ++++----------- 3 files changed, 12 insertions(+), 33 deletions(-) diff --git a/cpp/cmake/modules/detail/generate_header.cmake b/cpp/cmake/modules/detail/generate_header.cmake index 86851e8f47..03c2680304 100644 --- a/cpp/cmake/modules/detail/generate_header.cmake +++ b/cpp/cmake/modules/detail/generate_header.cmake @@ -1,15 +1,8 @@ # ============================================================================= -# Copyright (c) 2025, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. +# cmake-format: off +# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# cmake-format: on # ============================================================================= # Read objects from response file to avoid argument length issues diff --git a/cpp/cmake/modules/embed_fatbins.cmake b/cpp/cmake/modules/embed_fatbins.cmake index 09f9d4ea0d..78064904fc 100644 --- a/cpp/cmake/modules/embed_fatbins.cmake +++ b/cpp/cmake/modules/embed_fatbins.cmake @@ -1,15 +1,8 @@ # ============================================================================= -# Copyright (c) 2025, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. +# cmake-format: off +# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# cmake-format: on # ============================================================================= include_guard() diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 6b895a8180..2d6011c008 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -1,15 +1,8 @@ # ============================================================================= -# Copyright (c) 2025, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. +# cmake-format: off +# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# cmake-format: on # ============================================================================= include_guard() From 4dacc6ef4a2410d6fc058a18875cba1e0772bf45 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 27 Oct 2025 19:55:19 +0000 Subject: [PATCH 035/158] remove one more straggling comment --- cpp/src/detail/jit_lto/FragmentDatabase.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index 3ab97145d4..0d8c1197de 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -21,7 +21,6 @@ bool FragmentDatabase::make_cache_entry(std::string const& name, std::string con FragmentDatabase& fragment_database() { - // Left to the reader to make this thread safe static FragmentDatabase database; return database; } From 1fd95cdf6902c9f343eaf73c4e5ab6eeca3c47d6 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 27 Oct 2025 19:57:41 +0000 Subject: [PATCH 036/158] use raft expects --- cpp/src/detail/jit_lto/FragmentDatabase.cu | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index 0d8c1197de..2b51630372 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -3,11 +3,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include - #include #include +#include + FragmentDatabase::FragmentDatabase() {} bool FragmentDatabase::make_cache_entry(std::string const& name, std::string const& params) @@ -29,10 +29,7 @@ FragmentEntry* FragmentDatabase::get_fragment(std::string const& key) { auto& db = fragment_database(); auto val = db.cache.find(key); - if (val == db.cache.end()) { - std::cout << "FragmentDatabase: Key not found" << std::endl; - return nullptr; - } + RAFT_EXPECTS(val != db.cache.end(), "FragmentDatabase: Key not found"); return val->second.get(); } From 78002c6b9f9678bf33916f86a71817b9655c0f60 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 12 Dec 2025 19:22:22 +0000 Subject: [PATCH 037/158] address review --- cpp/cmake/modules/embed_fatbins.cmake | 2 +- .../modules/generate_jit_lto_kernels.cmake | 2 +- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 23 ++------------- cpp/src/detail/jit_lto/FragmentEntry.cu | 24 ++------------- cpp/src/detail/jit_lto/nvjitlink_checker.hpp | 29 +++++++++++++++++++ 5 files changed, 35 insertions(+), 45 deletions(-) create mode 100644 cpp/src/detail/jit_lto/nvjitlink_checker.hpp diff --git a/cpp/cmake/modules/embed_fatbins.cmake b/cpp/cmake/modules/embed_fatbins.cmake index 78064904fc..5cfb74b011 100644 --- a/cpp/cmake/modules/embed_fatbins.cmake +++ b/cpp/cmake/modules/embed_fatbins.cmake @@ -5,7 +5,7 @@ # cmake-format: on # ============================================================================= -include_guard() +include_guard(GLOBAL) function(embed_fatbins library_name kernel_target) find_package(CUDAToolkit REQUIRED) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 2d6011c008..bef62d7267 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -5,7 +5,7 @@ # cmake-format: on # ============================================================================= -include_guard() +include_guard(GLOBAL) # Generate JIT LTO kernel files at build time using a Python generator script Arguments: kernel_name # - Name of the kernel type (e.g., "interleaved_scan") generator_script - Path to the Python script diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 4db3a09140..2d9e200405 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -3,8 +3,9 @@ * SPDX-License-Identifier: Apache-2.0 */ +#include "nvjitlink_checker.hpp" + #include -#include #include #include #include @@ -19,26 +20,6 @@ #include -namespace { -// We can make a better RAII wrapper around nvjitlinkhandle -void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) -{ - if (result != NVJITLINK_SUCCESS) { - std::cerr << "\n nvJITLink failed with error " << result << '\n'; - size_t log_size = 0; - result = nvJitLinkGetErrorLogSize(handle, &log_size); - if (result == NVJITLINK_SUCCESS && log_size > 0) { - std::unique_ptr log{new char[log_size]}; - result = nvJitLinkGetErrorLog(handle, log.get()); - if (result == NVJITLINK_SUCCESS) { - std::cerr << "AlgorithmPlanner nvJITLink error log: " << log.get() << '\n'; - } - } - exit(1); - } -} -} // namespace - void AlgorithmPlanner::add_entrypoint() { auto entrypoint_fragment = fragment_database().get_fragment(this->entrypoint); diff --git a/cpp/src/detail/jit_lto/FragmentEntry.cu b/cpp/src/detail/jit_lto/FragmentEntry.cu index 9d9349bdd0..2f2832b95f 100644 --- a/cpp/src/detail/jit_lto/FragmentEntry.cu +++ b/cpp/src/detail/jit_lto/FragmentEntry.cu @@ -3,32 +3,12 @@ * SPDX-License-Identifier: Apache-2.0 */ +#include "nvjitlink_checker.hpp" + #include -#include #include -namespace { - -// We can make a better RAII wrapper around nvjitlinkhandle -void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) -{ - if (result != NVJITLINK_SUCCESS) { - std::cerr << "\n nvJITLink failed with error " << result << '\n'; - size_t log_size = 0; - result = nvJitLinkGetErrorLogSize(handle, &log_size); - if (result == NVJITLINK_SUCCESS && log_size > 0) { - std::unique_ptr log{new char[log_size]}; - result = nvJitLinkGetErrorLog(handle, log.get()); - if (result == NVJITLINK_SUCCESS) { - std::cerr << "FragmentEntry nvJITLink error log: " << log.get() << '\n'; - } - } - exit(1); - } -} -} // namespace - FragmentEntry::FragmentEntry(std::string const& params) : compute_key(params) {} FatbinFragmentEntry::FatbinFragmentEntry(std::string const& params, diff --git a/cpp/src/detail/jit_lto/nvjitlink_checker.hpp b/cpp/src/detail/jit_lto/nvjitlink_checker.hpp new file mode 100644 index 0000000000..c0ae3f4b0e --- /dev/null +++ b/cpp/src/detail/jit_lto/nvjitlink_checker.hpp @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include + +namespace { +// We can make a better RAII wrapper around nvjitlinkhandle +void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) +{ + if (result != NVJITLINK_SUCCESS) { + RAFT_FAIL("nvJITLink failed with error " + std::to_string(result)); + size_t log_size = 0; + result = nvJitLinkGetErrorLogSize(handle, &log_size); + if (result == NVJITLINK_SUCCESS && log_size > 0) { + std::unique_ptr log{new char[log_size]}; + result = nvJitLinkGetErrorLog(handle, log.get()); + if (result == NVJITLINK_SUCCESS) { + RAFT_FAIL("AlgorithmPlanner nvJITLink error log: " + std::string(log.get())); + } + } + } +} +} // namespace From 9ad6a0bc2feeef0071a68f42db11980df7bf14e9 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 12 Dec 2025 19:36:57 +0000 Subject: [PATCH 038/158] pre-commit --- cpp/CMakeLists.txt | 3 +- .../jit_lto_kernels/generate_kernels.py | 196 +++++++++++------- 2 files changed, 121 insertions(+), 78 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f248a545fc..d2f4c07b4c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -766,8 +766,7 @@ SECTIONS $ # needs to be public for DT_NEEDED $> # header only PRIVATE nvidia::cutlass::cutlass $ - $<$:CUDA::nvJitLink> - $<$:CUDA::nvtx3> + $<$:CUDA::nvJitLink> $<$:CUDA::nvtx3> ) if(JIT_LTO_COMPILATION) diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py index 1df3aee8ca..8afa1baa20 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py @@ -21,32 +21,32 @@ # Data type configurations: (data_type, acc_type, veclens, type_abbrev, acc_abbrev) # Each data type has veclen=1 and one optimized larger veclen DATA_TYPE_CONFIGS = [ - ('float', 'float', [1, 4], 'f', 'f'), - ('__half', '__half', [1, 8], 'h', 'h'), - ('uint8_t', 'uint32_t', [1, 16], 'uc', 'ui'), - ('int8_t', 'int32_t', [1, 16], 'sc', 'i'), + ("float", "float", [1, 4], "f", "f"), + ("__half", "__half", [1, 8], "h", "h"), + ("uint8_t", "uint32_t", [1, 16], "uc", "ui"), + ("int8_t", "int32_t", [1, 16], "sc", "i"), ] -IDX_TYPE = 'int64_t' -IDX_TYPE_ABBREV = 'l' +IDX_TYPE = "int64_t" +IDX_TYPE_ABBREV = "l" # Metric configurations for device functions METRIC_CONFIGS = [ - 'euclidean', - 'inner_prod', + "euclidean", + "inner_prod", ] # Filter configurations FILTER_CONFIGS = [ - 'filter_none', - 'filter_bitset', + "filter_none", + "filter_bitset", ] # Post lambda configurations POST_LAMBDA_CONFIGS = [ - 'post_identity', - 'post_sqrt', - 'post_compose', + "post_identity", + "post_sqrt", + "post_compose", ] @@ -54,48 +54,56 @@ def generate_kernel_combinations(): """Generate all valid kernel parameter combinations.""" kernels = [] - for data_type, acc_type, veclens, type_abbrev, acc_abbrev in DATA_TYPE_CONFIGS: + for ( + data_type, + acc_type, + veclens, + type_abbrev, + acc_abbrev, + ) in DATA_TYPE_CONFIGS: for capacity, veclen, ascending, compute_norm in itertools.product( CAPACITIES, veclens, ASCENDING_VALUES, COMPUTE_NORM_VALUES ): - kernels.append({ - 'capacity': capacity, - 'veclen': veclen, - 'ascending': ascending, - 'compute_norm': compute_norm, - 'data_type': data_type, - 'acc_type': acc_type, - 'idx_type': IDX_TYPE, - 'type_abbrev': type_abbrev, - 'acc_abbrev': acc_abbrev, - 'idx_abbrev': IDX_TYPE_ABBREV, - }) + kernels.append( + { + "capacity": capacity, + "veclen": veclen, + "ascending": ascending, + "compute_norm": compute_norm, + "data_type": data_type, + "acc_type": acc_type, + "idx_type": IDX_TYPE, + "type_abbrev": type_abbrev, + "acc_abbrev": acc_abbrev, + "idx_abbrev": IDX_TYPE_ABBREV, + } + ) return kernels def generate_filename(params): """Generate filename from kernel parameters.""" - capacity = params['capacity'] - veclen = params['veclen'] - ascending = 'true' if params['ascending'] else 'false' - compute_norm = 'true' if params['compute_norm'] else 'false' - type_abbrev = params['type_abbrev'] - acc_abbrev = params['acc_abbrev'] - idx_abbrev = params['idx_abbrev'] + capacity = params["capacity"] + veclen = params["veclen"] + ascending = "true" if params["ascending"] else "false" + compute_norm = "true" if params["compute_norm"] else "false" + type_abbrev = params["type_abbrev"] + acc_abbrev = params["acc_abbrev"] + idx_abbrev = params["idx_abbrev"] return f"interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{type_abbrev}_{acc_abbrev}_{idx_abbrev}.cu" def generate_cuda_file_content(params): """Generate the content of a CUDA kernel instantiation file.""" - capacity = params['capacity'] - veclen = params['veclen'] - ascending = 'true' if params['ascending'] else 'false' - compute_norm = 'true' if params['compute_norm'] else 'false' - data_type = params['data_type'] - acc_type = params['acc_type'] - idx_type = params['idx_type'] + capacity = params["capacity"] + veclen = params["veclen"] + ascending = "true" if params["ascending"] else "false" + compute_norm = "true" if params["compute_norm"] else "false" + data_type = params["data_type"] + acc_type = params["acc_type"] + idx_type = params["idx_type"] content = f"""/* * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. @@ -122,18 +130,18 @@ def generate_cuda_file_content(params): #include #include -#include "interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params['type_abbrev']}_{params['acc_abbrev']}_{params['idx_abbrev']}.h" +#include "interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params["type_abbrev"]}_{params["acc_abbrev"]}_{params["idx_abbrev"]}.h" using namespace cuvs::neighbors::ivf_flat::detail; -__attribute__((__constructor__)) static void register_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params['type_abbrev']}_{params['acc_abbrev']}_{params['idx_abbrev']}() +__attribute__((__constructor__)) static void register_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params["type_abbrev"]}_{params["acc_abbrev"]}_{params["idx_abbrev"]}() {{ - registerAlgorithm( + registerAlgorithm( "interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}", - embedded_interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params['type_abbrev']}_{params['acc_abbrev']}_{params['idx_abbrev']}, - sizeof(embedded_interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params['type_abbrev']}_{params['acc_abbrev']}_{params['idx_abbrev']})); + embedded_interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params["type_abbrev"]}_{params["acc_abbrev"]}_{params["idx_abbrev"]}, + sizeof(embedded_interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params["type_abbrev"]}_{params["acc_abbrev"]}_{params["idx_abbrev"]})); }} #endif @@ -141,15 +149,31 @@ def generate_cuda_file_content(params): return content -def generate_metric_device_function_content(metric_name, veclen, data_type, acc_type): +def generate_metric_device_function_content( + metric_name, veclen, data_type, acc_type +): """Generate content for a metric device function file.""" - type_abbrev = {'float': 'f', '__half': 'h', 'uint8_t': 'uc', 'int8_t': 'sc'}[data_type] - acc_abbrev = {'float': 'f', '__half': 'h', 'uint32_t': 'ui', 'int32_t': 'i'}[acc_type] - - if metric_name == 'euclidean': - header_file = 'neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh' + type_abbrev = { + "float": "f", + "__half": "h", + "uint8_t": "uc", + "int8_t": "sc", + }[data_type] + acc_abbrev = { + "float": "f", + "__half": "h", + "uint32_t": "ui", + "int32_t": "i", + }[acc_type] + + if metric_name == "euclidean": + header_file = ( + "neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh" + ) else: # inner_prod - header_file = 'neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh' + header_file = ( + "neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh" + ) content = f"""/* * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. @@ -193,10 +217,10 @@ def generate_metric_device_function_content(metric_name, veclen, data_type, acc_ def generate_filter_device_function_content(filter_name): """Generate content for a filter device function file.""" - if filter_name == 'filter_none': - header_file = 'neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh' + if filter_name == "filter_none": + header_file = "neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh" else: # filter_bitset - header_file = 'neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh' + header_file = "neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh" content = f"""/* * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. @@ -236,12 +260,12 @@ def generate_filter_device_function_content(filter_name): def generate_post_lambda_device_function_content(post_lambda_name): """Generate content for a post lambda device function file.""" - if post_lambda_name == 'post_identity': - header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh' - elif post_lambda_name == 'post_sqrt': - header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh' + if post_lambda_name == "post_identity": + header_file = "neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh" + elif post_lambda_name == "post_sqrt": + header_file = "neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh" else: # post_compose - header_file = 'neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh' + header_file = "neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh" content = f"""/* * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. @@ -281,18 +305,26 @@ def generate_post_lambda_device_function_content(post_lambda_name): def generate_metric_device_functions(output_base_dir): """Generate all metric device function files.""" - metric_dir = output_base_dir / 'metric_device_functions' + metric_dir = output_base_dir / "metric_device_functions" metric_dir.mkdir(parents=True, exist_ok=True) metric_files = [] for metric_name in METRIC_CONFIGS: - for data_type, acc_type, veclens, type_abbrev, acc_abbrev in DATA_TYPE_CONFIGS: + for ( + data_type, + acc_type, + veclens, + type_abbrev, + acc_abbrev, + ) in DATA_TYPE_CONFIGS: for veclen in veclens: filename = f"metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}.cu" filepath = metric_dir / filename - content = generate_metric_device_function_content(metric_name, veclen, data_type, acc_type) + content = generate_metric_device_function_content( + metric_name, veclen, data_type, acc_type + ) # Only write if content has changed if not filepath.exists() or filepath.read_text() != content: @@ -305,7 +337,7 @@ def generate_metric_device_functions(output_base_dir): def generate_filter_device_functions(output_base_dir): """Generate all filter device function files.""" - filter_dir = output_base_dir / 'filter_device_functions' + filter_dir = output_base_dir / "filter_device_functions" filter_dir.mkdir(parents=True, exist_ok=True) filter_files = [] @@ -327,7 +359,7 @@ def generate_filter_device_functions(output_base_dir): def generate_post_lambda_device_functions(output_base_dir): """Generate all post lambda device function files.""" - post_lambda_dir = output_base_dir / 'post_lambda_device_functions' + post_lambda_dir = output_base_dir / "post_lambda_device_functions" post_lambda_dir.mkdir(parents=True, exist_ok=True) post_lambda_files = [] @@ -336,7 +368,9 @@ def generate_post_lambda_device_functions(output_base_dir): filename = f"{post_lambda_name}.cu" filepath = post_lambda_dir / filename - content = generate_post_lambda_device_function_content(post_lambda_name) + content = generate_post_lambda_device_function_content( + post_lambda_name + ) # Only write if content has changed if not filepath.exists() or filepath.read_text() != content: @@ -354,12 +388,14 @@ def main(): script_dir = Path(__file__).parent.absolute() # Output directory - use CMAKE_CURRENT_BINARY_DIR if provided, otherwise use source dir - output_base_dir = Path(sys.argv[1]).absolute() if len(sys.argv) > 1 else script_dir + output_base_dir = ( + Path(sys.argv[1]).absolute() if len(sys.argv) > 1 else script_dir + ) # Kernel name - use provided name if available, otherwise default to "interleaved_scan" kernel_name = sys.argv[2] if len(sys.argv) > 2 else "interleaved_scan" - output_dir = output_base_dir / 'interleaved_scan_kernels' + output_dir = output_base_dir / "interleaved_scan_kernels" output_dir.mkdir(parents=True, exist_ok=True) kernels = generate_kernel_combinations() @@ -388,29 +424,37 @@ def main(): post_lambda_files = generate_post_lambda_device_functions(output_base_dir) # Generate CMake file listing all generated files - cmake_file = output_base_dir / f'{kernel_name}.cmake' + cmake_file = output_base_dir / f"{kernel_name}.cmake" cmake_content = "# Auto-generated file listing all kernel and device function files\n\n" # Set relative path lists cmake_content += "set(INTERLEAVED_SCAN_KERNEL_FILES\n" for filename in sorted(generated_files): - cmake_content += f" generated_kernels/interleaved_scan_kernels/{filename}\n" + cmake_content += ( + f" generated_kernels/interleaved_scan_kernels/{filename}\n" + ) cmake_content += ")\n\n" cmake_content += "set(METRIC_DEVICE_FUNCTION_FILES\n" for filename in sorted(metric_files): - cmake_content += f" generated_kernels/metric_device_functions/{filename}\n" + cmake_content += ( + f" generated_kernels/metric_device_functions/{filename}\n" + ) cmake_content += ")\n\n" cmake_content += "set(FILTER_DEVICE_FUNCTION_FILES\n" for filename in sorted(filter_files): - cmake_content += f" generated_kernels/filter_device_functions/{filename}\n" + cmake_content += ( + f" generated_kernels/filter_device_functions/{filename}\n" + ) cmake_content += ")\n\n" cmake_content += "set(POST_LAMBDA_DEVICE_FUNCTION_FILES\n" for filename in sorted(post_lambda_files): - cmake_content += f" generated_kernels/post_lambda_device_functions/{filename}\n" + cmake_content += ( + f" generated_kernels/post_lambda_device_functions/{filename}\n" + ) cmake_content += ")\n\n" # Add logic to prepend CMAKE_CURRENT_BINARY_DIR and set variables to PARENT_SCOPE @@ -466,5 +510,5 @@ def main(): cmake_file.write_text(cmake_content) -if __name__ == '__main__': +if __name__ == "__main__": main() From bf4c4ad6383d601ccffb50da23735d9fc5f1d1f2 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 12 Dec 2025 21:30:07 +0000 Subject: [PATCH 039/158] address review --- conda/recipes/libcuvs/recipe.yaml | 18 ++++++++++++++++++ cpp/CMakeLists.txt | 18 ++++++++++++------ cpp/src/detail/jit_lto/nvjitlink_checker.hpp | 2 -- ...ivf_flat_interleaved_scan_explicit_inst.cuh | 2 +- .../ivf_flat/ivf_flat_interleaved_scan_jit.cuh | 18 ------------------ 5 files changed, 31 insertions(+), 27 deletions(-) diff --git a/conda/recipes/libcuvs/recipe.yaml b/conda/recipes/libcuvs/recipe.yaml index 974e85e7b8..c945e20df1 100644 --- a/conda/recipes/libcuvs/recipe.yaml +++ b/conda/recipes/libcuvs/recipe.yaml @@ -134,6 +134,9 @@ outputs: - libcurand - libcusolver - libcusparse + - if: cuda_major == "13" + then: + - libnvjitlink ignore_run_exports: by_name: - cuda-cudart @@ -147,6 +150,9 @@ outputs: - librmm - mkl - nccl + - if: cuda_major == "13" + then: + - libnvjitlink about: homepage: ${{ load_from_file("python/libcuvs/pyproject.toml").project.urls.Homepage }} license: ${{ load_from_file("python/libcuvs/pyproject.toml").project.license.text }} @@ -183,6 +189,9 @@ outputs: - libcurand-dev - libcusolver-dev - libcusparse-dev + - if: cuda_major == "13" + then: + - libnvjitlink-dev run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - ${{ pin_subpackage("libcuvs-headers", exact=True) }} @@ -194,6 +203,9 @@ outputs: - libcurand - libcusolver - libcusparse + - if: cuda_major == "13" + then: + - libnvjitlink ignore_run_exports: by_name: - cuda-cudart @@ -207,6 +219,9 @@ outputs: - librmm - mkl - nccl + - if: cuda_major == "13" + then: + - libnvjitlink about: homepage: ${{ load_from_file("python/libcuvs/pyproject.toml").project.urls.Homepage }} license: ${{ load_from_file("python/libcuvs/pyproject.toml").project.license.text }} @@ -241,6 +256,9 @@ outputs: - libcurand-dev - libcusolver-dev - libcusparse-dev + - if: cuda_major == "13" + then: + - libnvjitlink-dev run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - ${{ pin_subpackage("libcuvs-headers", exact=True) }} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d2f4c07b4c..8a3e1ed647 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -582,8 +582,10 @@ if(NOT BUILD_CPU_ONLY) ) target_compile_definitions( - cuvs_objs PRIVATE $<$:CUVS_BUILD_CAGRA_HNSWLIB> - $<$:NVTX_ENABLED> + cuvs_objs + PRIVATE $<$:CUVS_BUILD_CAGRA_HNSWLIB> + $<$:NVTX_ENABLED> + $<$:CUVS_ENABLE_JIT_LTO> ) target_link_libraries( @@ -691,8 +693,10 @@ if(NOT BUILD_CPU_ONLY) "$<$:${CUVS_CUDA_FLAGS}>" ) target_compile_definitions( - cuvs PUBLIC $<$:CUVS_BUILD_CAGRA_HNSWLIB> - $<$:NVTX_ENABLED> + cuvs + PUBLIC $<$:CUVS_BUILD_CAGRA_HNSWLIB> + $<$:NVTX_ENABLED> + $<$:CUVS_ENABLE_JIT_LTO> ) target_link_libraries( @@ -748,8 +752,10 @@ SECTIONS target_compile_options(cuvs_static PRIVATE "$<$:${CUVS_CXX_FLAGS}>") target_compile_definitions( - cuvs_static PUBLIC $<$:CUVS_BUILD_CAGRA_HNSWLIB> - $<$:NVTX_ENABLED> + cuvs_static + PUBLIC $<$:CUVS_BUILD_CAGRA_HNSWLIB> + $<$:NVTX_ENABLED> + $<$:CUVS_ENABLE_JIT_LTO> ) target_include_directories(cuvs_static INTERFACE "$") diff --git a/cpp/src/detail/jit_lto/nvjitlink_checker.hpp b/cpp/src/detail/jit_lto/nvjitlink_checker.hpp index c0ae3f4b0e..c712b41a1b 100644 --- a/cpp/src/detail/jit_lto/nvjitlink_checker.hpp +++ b/cpp/src/detail/jit_lto/nvjitlink_checker.hpp @@ -9,7 +9,6 @@ #include #include -namespace { // We can make a better RAII wrapper around nvjitlinkhandle void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) { @@ -26,4 +25,3 @@ void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) } } } -} // namespace diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh index c5758add3c..fbd280a528 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh @@ -6,7 +6,7 @@ #pragma once #include "../detail/ann_utils.cuh" -#if CUDART_VERSION >= 13000 +#ifdef CUVS_ENABLE_JIT_LTO #include "ivf_flat_interleaved_scan_jit.cuh" #else #include "ivf_flat_interleaved_scan.cuh" diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index 0ad344684b..bb7ba57a19 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -66,23 +66,6 @@ constexpr auto get_filter_type_tag() } } -// template -// constexpr auto get_metric_tag() -// { -// // Get tags for T and AccT -// auto t_tag = get_data_type_tag(); -// auto acc_tag = get_acc_type_tag(); - -// // Check for euclidean_dist and return templated tag with tag types -// if constexpr (std::is_same_v>) { -// return tag_metric_euclidean{}; -// } -// // Check for inner_prod_dist and return templated tag with tag types -// if constexpr (std::is_same_v>) { -// return tag_metric_inner_product{}; -// } -// } - template constexpr auto get_metric_name() { @@ -116,7 +99,6 @@ constexpr auto get_post_lambda_name() /** * Configure the gridDim.x to maximize GPU occupancy, but reduce the output size */ -// template inline uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMemSize, From 18b2af9dd931b75b04976c759ff7a8d69a96e2b5 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 12 Dec 2025 22:03:36 +0000 Subject: [PATCH 040/158] Generate kernel files in CMake instead of Python --- cpp/CMakeLists.txt | 10 +- .../modules/generate_jit_lto_kernels.cmake | 138 +++-- .../ivf_flat/jit_lto_kernels/filter.cu.in | 32 ++ .../jit_lto_kernels/generate_kernels.py | 514 ------------------ .../interleaved_scan_kernel.cu.in | 40 ++ .../ivf_flat/jit_lto_kernels/metric.cu.in | 36 ++ .../jit_lto_kernels/post_lambda.cu.in | 32 ++ 7 files changed, 241 insertions(+), 561 deletions(-) create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d2f4c07b4c..a569ea9cd6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -349,10 +349,7 @@ if(NOT BUILD_CPU_ONLY) # Generate interleaved scan kernel files at build time include(cmake/modules/generate_jit_lto_kernels.cmake) - generate_jit_lto_kernels( - "interleaved_scan" - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py" - ) + generate_jit_lto_kernels() set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/AlgorithmPlanner.cu @@ -606,9 +603,6 @@ if(NOT BUILD_CPU_ONLY) ${FILTER_DEVICE_FUNCTION_FILES} ${POST_LAMBDA_DEVICE_FUNCTION_FILES} ) - # Make sure the kernels are generated before we try to build them - add_dependencies(cuvs_jit_lto_fatbins ${INTERLEAVED_SCAN_KERNELS_TARGET}) - target_compile_definitions(cuvs_jit_lto_fatbins PRIVATE BUILD_KERNEL) target_include_directories( cuvs_jit_lto_fatbins @@ -709,7 +703,6 @@ if(NOT BUILD_CPU_ONLY) ) if(JIT_LTO_COMPILATION) - add_dependencies(cuvs ${INTERLEAVED_SCAN_KERNELS_TARGET}) embed_fatbins(cuvs cuvs_jit_lto_fatbins) endif() @@ -770,7 +763,6 @@ SECTIONS ) if(JIT_LTO_COMPILATION) - add_dependencies(cuvs_static ${INTERLEAVED_SCAN_KERNELS_TARGET}) embed_fatbins(cuvs_static cuvs_jit_lto_fatbins) endif() endif() diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index bef62d7267..a5df3da812 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -1,53 +1,115 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= include_guard(GLOBAL) -# Generate JIT LTO kernel files at build time using a Python generator script Arguments: kernel_name -# - Name of the kernel type (e.g., "interleaved_scan") generator_script - Path to the Python script -# that generates the kernels -function(generate_jit_lto_kernels kernel_name generator_script) - find_package(Python3 REQUIRED COMPONENTS Interpreter) - - set(OUTPUT_BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated_kernels) - set(GENERATED_CMAKE_FILE ${OUTPUT_BASE_DIR}/${kernel_name}.cmake) - - # Generate the kernels at build time - add_custom_command( - OUTPUT ${GENERATED_CMAKE_FILE} - COMMAND ${Python3_EXECUTABLE} ${generator_script} ${OUTPUT_BASE_DIR} ${kernel_name} - DEPENDS ${generator_script} - COMMENT "Generating ${kernel_name} kernel files..." - VERBATIM +function(parse_data_type_configs config data_type acc_type veclens type_abbrev acc_abbrev) + if(config MATCHES [==[^([^,]+),([^,]+),\[([0-9]+(,[0-9]+)*)?\],([^,]+),([^,]+)$]==]) + set("${data_type}" "${CMAKE_MATCH_1}" PARENT_SCOPE) + set("${acc_type}" "${CMAKE_MATCH_2}" PARENT_SCOPE) + string(REPLACE "," ";" veclens_value "${CMAKE_MATCH_3}") + set("${veclens}" "${veclens_value}" PARENT_SCOPE) + set("${type_abbrev}" "${CMAKE_MATCH_5}" PARENT_SCOPE) + set("${acc_abbrev}" "${CMAKE_MATCH_6}" PARENT_SCOPE) + else() + message(FATAL_ERROR "Invalid data type config: ${config}") + endif() +endfunction() + +function(generate_jit_lto_kernels) + set(generated_kernels_dir "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels") + string(TIMESTAMP year "%Y") + + set(capacities 0 1 2 4 8 16 32 64 128 256) + set(ascending_values true false) + set(compute_norm_values true false) + set(data_type_configs + "float,float,[1,4],f,f" + "__half,__half,[1,8],h,h" + "uint8_t,uint32_t,[1,16],uc,ui" + "int8_t,int32_t,[1,16],sc,i" ) + set(idx_type int64_t) + set(idx_abbrev l) + set(metric_configs euclidean inner_prod) + set(filter_configs filter_none filter_bitset) + set(post_lambda_configs post_identity post_sqrt post_compose) + + foreach(config IN LISTS data_type_configs) + parse_data_type_configs("${config}" data_type acc_type veclens type_abbrev acc_abbrev) + foreach(veclen IN LISTS veclens) + foreach(capacity IN LISTS capacities) + foreach(ascending IN LISTS ascending_values) + foreach(compute_norm IN LISTS compute_norm_values) + set(filename "${generated_kernels_dir}/interleaved_scan_kernels/interleaved_scan_kernel_${capacity}_${veclen}_${ascending}_${compute_norm}_${type_abbrev}_${acc_abbrev}_${idx_abbrev}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in" + "${filename}" + @ONLY + ) + list(APPEND INTERLEAVED_SCAN_KERNEL_FILES "${filename}") + endforeach() + endforeach() + endforeach() + + foreach(metric_name IN LISTS metric_configs) + if(metric_name STREQUAL "euclidean") + set(header_file "neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh") + elseif(metric_name STREQUAL "inner_prod") + set(header_file "neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh") + endif() - # Create a custom target that depends on the generated CMake file Use a unique target name based - # on the kernel name - set(TARGET_NAME "generate_${kernel_name}_kernels_target") - add_custom_target(${TARGET_NAME} DEPENDS ${GENERATED_CMAKE_FILE}) - - # Only generate if the CMake file doesn't exist - if(NOT EXISTS ${GENERATED_CMAKE_FILE}) - message(VERBOSE "Generating ${kernel_name} kernels for the first time...") - execute_process( - COMMAND ${Python3_EXECUTABLE} ${generator_script} ${OUTPUT_BASE_DIR} ${kernel_name} - RESULT_VARIABLE GENERATION_RESULT - OUTPUT_VARIABLE GENERATION_OUTPUT - ERROR_VARIABLE GENERATION_ERROR + set(filename "${generated_kernels_dir}/metric_device_functions/metric_${metric_name}_${veclen}_${type_abbrev}_${acc_abbrev}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in" + "${filename}" + @ONLY + ) + list(APPEND METRIC_DEVICE_FUNCTION_FILES "${filename}") + endforeach() + endforeach() + endforeach() + + foreach(filter_name IN LISTS filter_configs) + if(filter_name STREQUAL "filter_none") + set(header_file "neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh") + elseif(filter_name STREQUAL "filter_bitset") + set(header_file "neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh") + endif() + + set(filename "${generated_kernels_dir}/filter_device_functions/${filter_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in" + "${filename}" + @ONLY ) + list(APPEND FILTER_DEVICE_FUNCTION_FILES "${filename}") + endforeach() - if(NOT GENERATION_RESULT EQUAL 0) - message( - FATAL_ERROR - "Failed to generate kernel files during configuration\nOutput: ${GENERATION_OUTPUT}\nError: ${GENERATION_ERROR}" - ) + foreach(post_lambda_name IN LISTS post_lambda_configs) + if(post_lambda_name STREQUAL "post_identity") + set(header_file "neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh") + elseif(post_lambda_name STREQUAL "post_sqrt") + set(header_file "neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh") + elseif(post_lambda_name STREQUAL "post_compose") + set(header_file "neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh") endif() - endif() - # Include the generated CMake file The generated file handles setting variables to PARENT_SCOPE - include(${GENERATED_CMAKE_FILE}) + set(filename "${generated_kernels_dir}/post_lambda_device_functions/${post_lambda_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in" + "${filename}" + @ONLY + ) + list(APPEND POST_LAMBDA_DEVICE_FUNCTION_FILES "${filename}") + endforeach() + + set(INTERLEAVED_SCAN_KERNEL_FILES "${INTERLEAVED_SCAN_KERNEL_FILES}" PARENT_SCOPE) + set(METRIC_DEVICE_FUNCTION_FILES "${METRIC_DEVICE_FUNCTION_FILES}" PARENT_SCOPE) + set(FILTER_DEVICE_FUNCTION_FILES "${FILTER_DEVICE_FUNCTION_FILES}" PARENT_SCOPE) + set(POST_LAMBDA_DEVICE_FUNCTION_FILES "${POST_LAMBDA_DEVICE_FUNCTION_FILES}" PARENT_SCOPE) endfunction() diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in new file mode 100644 index 0000000000..4a6be97fa0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in @@ -0,0 +1,32 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include <@header_file@> + +namespace cuvs::neighbors::ivf_flat::detail { + +// Instantiate the device function template +template __device__ bool sample_filter(int64_t* const* const, const uint32_t, const uint32_t, const uint32_t, uint32_t*, int64_t, int64_t); + +} // namespace cuvs::neighbors::ivf_flat::detail + +#else + +#include +#include "@filter_name@.h" + +__attribute__((__constructor__)) static void register_@filter_name@() +{ + registerAlgorithm( + "@filter_name@", + embedded_@filter_name@, + sizeof(embedded_@filter_name@)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py deleted file mode 100644 index 8afa1baa20..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/generate_kernels.py +++ /dev/null @@ -1,514 +0,0 @@ -# ============================================================================= -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. -# SPDX-License-Identifier: Apache-2.0 -# ============================================================================= - -#!/usr/bin/env python3 -""" -Generate CUDA kernel instantiation files for IVF-Flat interleaved scan. -This script generates kernel files programmatically based on type combinations. -""" - -from pathlib import Path -import itertools - - -# Define the parameter space for kernel generation -CAPACITIES = [0, 1, 2, 4, 8, 16, 32, 64, 128, 256] -ASCENDING_VALUES = [True, False] -COMPUTE_NORM_VALUES = [True, False] - -# Data type configurations: (data_type, acc_type, veclens, type_abbrev, acc_abbrev) -# Each data type has veclen=1 and one optimized larger veclen -DATA_TYPE_CONFIGS = [ - ("float", "float", [1, 4], "f", "f"), - ("__half", "__half", [1, 8], "h", "h"), - ("uint8_t", "uint32_t", [1, 16], "uc", "ui"), - ("int8_t", "int32_t", [1, 16], "sc", "i"), -] - -IDX_TYPE = "int64_t" -IDX_TYPE_ABBREV = "l" - -# Metric configurations for device functions -METRIC_CONFIGS = [ - "euclidean", - "inner_prod", -] - -# Filter configurations -FILTER_CONFIGS = [ - "filter_none", - "filter_bitset", -] - -# Post lambda configurations -POST_LAMBDA_CONFIGS = [ - "post_identity", - "post_sqrt", - "post_compose", -] - - -def generate_kernel_combinations(): - """Generate all valid kernel parameter combinations.""" - kernels = [] - - for ( - data_type, - acc_type, - veclens, - type_abbrev, - acc_abbrev, - ) in DATA_TYPE_CONFIGS: - for capacity, veclen, ascending, compute_norm in itertools.product( - CAPACITIES, veclens, ASCENDING_VALUES, COMPUTE_NORM_VALUES - ): - kernels.append( - { - "capacity": capacity, - "veclen": veclen, - "ascending": ascending, - "compute_norm": compute_norm, - "data_type": data_type, - "acc_type": acc_type, - "idx_type": IDX_TYPE, - "type_abbrev": type_abbrev, - "acc_abbrev": acc_abbrev, - "idx_abbrev": IDX_TYPE_ABBREV, - } - ) - - return kernels - - -def generate_filename(params): - """Generate filename from kernel parameters.""" - capacity = params["capacity"] - veclen = params["veclen"] - ascending = "true" if params["ascending"] else "false" - compute_norm = "true" if params["compute_norm"] else "false" - type_abbrev = params["type_abbrev"] - acc_abbrev = params["acc_abbrev"] - idx_abbrev = params["idx_abbrev"] - - return f"interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{type_abbrev}_{acc_abbrev}_{idx_abbrev}.cu" - - -def generate_cuda_file_content(params): - """Generate the content of a CUDA kernel instantiation file.""" - capacity = params["capacity"] - veclen = params["veclen"] - ascending = "true" if params["ascending"] else "false" - compute_norm = "true" if params["compute_norm"] else "false" - data_type = params["data_type"] - acc_type = params["acc_type"] - idx_type = params["idx_type"] - - content = f"""/* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::ivf_flat::detail {{ - -// Instantiate the kernel template -template __global__ void interleaved_scan_kernel<{capacity}, {veclen}, {ascending}, {compute_norm}, {data_type}, {acc_type}, {idx_type}>( - const uint32_t, const {data_type}*, const uint32_t*, const {data_type}* const*, const uint32_t*, - const uint32_t, const uint32_t, const uint32_t, const uint32_t, const uint32_t*, const uint32_t, - {idx_type}* const* const, uint32_t*, {idx_type}, {idx_type}, uint32_t*, float*); - -}} // namespace cuvs::neighbors::ivf_flat::detail - -#else - -#include -#include -#include "interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params["type_abbrev"]}_{params["acc_abbrev"]}_{params["idx_abbrev"]}.h" - -using namespace cuvs::neighbors::ivf_flat::detail; - -__attribute__((__constructor__)) static void register_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params["type_abbrev"]}_{params["acc_abbrev"]}_{params["idx_abbrev"]}() -{{ - registerAlgorithm( - "interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}", - embedded_interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params["type_abbrev"]}_{params["acc_abbrev"]}_{params["idx_abbrev"]}, - sizeof(embedded_interleaved_scan_kernel_{capacity}_{veclen}_{ascending}_{compute_norm}_{params["type_abbrev"]}_{params["acc_abbrev"]}_{params["idx_abbrev"]})); -}} - -#endif -""" - return content - - -def generate_metric_device_function_content( - metric_name, veclen, data_type, acc_type -): - """Generate content for a metric device function file.""" - type_abbrev = { - "float": "f", - "__half": "h", - "uint8_t": "uc", - "int8_t": "sc", - }[data_type] - acc_abbrev = { - "float": "f", - "__half": "h", - "uint32_t": "ui", - "int32_t": "i", - }[acc_type] - - if metric_name == "euclidean": - header_file = ( - "neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh" - ) - else: # inner_prod - header_file = ( - "neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh" - ) - - content = f"""/* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include <{header_file}> - -namespace cuvs::neighbors::ivf_flat::detail {{ - -// Instantiate the device function template -template __device__ void compute_dist<{veclen}, {data_type}, {acc_type}>({acc_type}&, {acc_type}, {acc_type}); - -}} // namespace cuvs::neighbors::ivf_flat::detail - -#else - -#include -#include -#include "metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}.h" - -using namespace cuvs::neighbors::ivf_flat::detail; - -__attribute__((__constructor__)) static void register_metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}() -{{ - registerAlgorithm( - "{metric_name}_{veclen}", - embedded_metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}, - sizeof(embedded_metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev})); -}} - -#endif -""" - return content - - -def generate_filter_device_function_content(filter_name): - """Generate content for a filter device function file.""" - if filter_name == "filter_none": - header_file = "neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh" - else: # filter_bitset - header_file = "neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh" - - content = f"""/* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include <{header_file}> - -namespace cuvs::neighbors::ivf_flat::detail {{ - -// Instantiate the device function template -template __device__ bool sample_filter(int64_t* const* const, const uint32_t, const uint32_t, const uint32_t, uint32_t*, int64_t, int64_t); - -}} // namespace cuvs::neighbors::ivf_flat::detail - -#else - -#include -#include "{filter_name}.h" - -__attribute__((__constructor__)) static void register_{filter_name}() -{{ - registerAlgorithm( - "{filter_name}", - embedded_{filter_name}, - sizeof(embedded_{filter_name})); -}} - -#endif -""" - return content - - -def generate_post_lambda_device_function_content(post_lambda_name): - """Generate content for a post lambda device function file.""" - if post_lambda_name == "post_identity": - header_file = "neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh" - elif post_lambda_name == "post_sqrt": - header_file = "neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh" - else: # post_compose - header_file = "neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh" - - content = f"""/* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include <{header_file}> - -namespace cuvs::neighbors::ivf_flat::detail {{ - -// Instantiate the device function template -template __device__ float post_process(float); - -}} // namespace cuvs::neighbors::ivf_flat::detail - -#else - -#include -#include "{post_lambda_name}.h" - -__attribute__((__constructor__)) static void register_{post_lambda_name}() -{{ - registerAlgorithm( - "{post_lambda_name}", - embedded_{post_lambda_name}, - sizeof(embedded_{post_lambda_name})); -}} - -#endif -""" - return content - - -def generate_metric_device_functions(output_base_dir): - """Generate all metric device function files.""" - metric_dir = output_base_dir / "metric_device_functions" - metric_dir.mkdir(parents=True, exist_ok=True) - - metric_files = [] - - for metric_name in METRIC_CONFIGS: - for ( - data_type, - acc_type, - veclens, - type_abbrev, - acc_abbrev, - ) in DATA_TYPE_CONFIGS: - for veclen in veclens: - filename = f"metric_{metric_name}_{veclen}_{type_abbrev}_{acc_abbrev}.cu" - filepath = metric_dir / filename - - content = generate_metric_device_function_content( - metric_name, veclen, data_type, acc_type - ) - - # Only write if content has changed - if not filepath.exists() or filepath.read_text() != content: - filepath.write_text(content) - - metric_files.append(filename) - - return metric_files - - -def generate_filter_device_functions(output_base_dir): - """Generate all filter device function files.""" - filter_dir = output_base_dir / "filter_device_functions" - filter_dir.mkdir(parents=True, exist_ok=True) - - filter_files = [] - - for filter_name in FILTER_CONFIGS: - filename = f"{filter_name}.cu" - filepath = filter_dir / filename - - content = generate_filter_device_function_content(filter_name) - - # Only write if content has changed - if not filepath.exists() or filepath.read_text() != content: - filepath.write_text(content) - - filter_files.append(filename) - - return filter_files - - -def generate_post_lambda_device_functions(output_base_dir): - """Generate all post lambda device function files.""" - post_lambda_dir = output_base_dir / "post_lambda_device_functions" - post_lambda_dir.mkdir(parents=True, exist_ok=True) - - post_lambda_files = [] - - for post_lambda_name in POST_LAMBDA_CONFIGS: - filename = f"{post_lambda_name}.cu" - filepath = post_lambda_dir / filename - - content = generate_post_lambda_device_function_content( - post_lambda_name - ) - - # Only write if content has changed - if not filepath.exists() or filepath.read_text() != content: - filepath.write_text(content) - - post_lambda_files.append(filename) - - return post_lambda_files - - -def main(): - import sys - - # Get the script directory - script_dir = Path(__file__).parent.absolute() - - # Output directory - use CMAKE_CURRENT_BINARY_DIR if provided, otherwise use source dir - output_base_dir = ( - Path(sys.argv[1]).absolute() if len(sys.argv) > 1 else script_dir - ) - - # Kernel name - use provided name if available, otherwise default to "interleaved_scan" - kernel_name = sys.argv[2] if len(sys.argv) > 2 else "interleaved_scan" - - output_dir = output_base_dir / "interleaved_scan_kernels" - output_dir.mkdir(parents=True, exist_ok=True) - - kernels = generate_kernel_combinations() - - # Generate kernel files - generated_files = [] - for params in kernels: - filename = generate_filename(params) - filepath = output_dir / filename - - content = generate_cuda_file_content(params) - - # Only write if content has changed - if not filepath.exists() or filepath.read_text() != content: - filepath.write_text(content) - - generated_files.append(filename) - - # Generate metric device function files - metric_files = generate_metric_device_functions(output_base_dir) - - # Generate filter device function files - filter_files = generate_filter_device_functions(output_base_dir) - - # Generate post lambda device function files - post_lambda_files = generate_post_lambda_device_functions(output_base_dir) - - # Generate CMake file listing all generated files - cmake_file = output_base_dir / f"{kernel_name}.cmake" - - cmake_content = "# Auto-generated file listing all kernel and device function files\n\n" - - # Set relative path lists - cmake_content += "set(INTERLEAVED_SCAN_KERNEL_FILES\n" - for filename in sorted(generated_files): - cmake_content += ( - f" generated_kernels/interleaved_scan_kernels/{filename}\n" - ) - cmake_content += ")\n\n" - - cmake_content += "set(METRIC_DEVICE_FUNCTION_FILES\n" - for filename in sorted(metric_files): - cmake_content += ( - f" generated_kernels/metric_device_functions/{filename}\n" - ) - cmake_content += ")\n\n" - - cmake_content += "set(FILTER_DEVICE_FUNCTION_FILES\n" - for filename in sorted(filter_files): - cmake_content += ( - f" generated_kernels/filter_device_functions/{filename}\n" - ) - cmake_content += ")\n\n" - - cmake_content += "set(POST_LAMBDA_DEVICE_FUNCTION_FILES\n" - for filename in sorted(post_lambda_files): - cmake_content += ( - f" generated_kernels/post_lambda_device_functions/{filename}\n" - ) - cmake_content += ")\n\n" - - # Add logic to prepend CMAKE_CURRENT_BINARY_DIR and set variables to PARENT_SCOPE - cmake_content += f"""# Prepend the binary directory path to all kernel files -set(FULL_PATH_KERNEL_FILES) -foreach(kernel_file ${{INTERLEAVED_SCAN_KERNEL_FILES}}) - list(APPEND FULL_PATH_KERNEL_FILES ${{CMAKE_CURRENT_BINARY_DIR}}/${{kernel_file}}) -endforeach() - -# Prepend the binary directory path to all metric device function files -set(FULL_PATH_METRIC_FILES) -foreach(metric_file ${{METRIC_DEVICE_FUNCTION_FILES}}) - list(APPEND FULL_PATH_METRIC_FILES ${{CMAKE_CURRENT_BINARY_DIR}}/${{metric_file}}) -endforeach() - -# Prepend the binary directory path to all filter device function files -set(FULL_PATH_FILTER_FILES) -foreach(filter_file ${{FILTER_DEVICE_FUNCTION_FILES}}) - list(APPEND FULL_PATH_FILTER_FILES ${{CMAKE_CURRENT_BINARY_DIR}}/${{filter_file}}) -endforeach() - -# Prepend the binary directory path to all post lambda device function files -set(FULL_PATH_POST_LAMBDA_FILES) -foreach(post_lambda_file ${{POST_LAMBDA_DEVICE_FUNCTION_FILES}}) - list(APPEND FULL_PATH_POST_LAMBDA_FILES ${{CMAKE_CURRENT_BINARY_DIR}}/${{post_lambda_file}}) -endforeach() - -# Return the lists to parent scope -set(INTERLEAVED_SCAN_KERNEL_FILES - ${{FULL_PATH_KERNEL_FILES}} - PARENT_SCOPE -) -set(METRIC_DEVICE_FUNCTION_FILES - ${{FULL_PATH_METRIC_FILES}} - PARENT_SCOPE -) -set(FILTER_DEVICE_FUNCTION_FILES - ${{FULL_PATH_FILTER_FILES}} - PARENT_SCOPE -) -set(POST_LAMBDA_DEVICE_FUNCTION_FILES - ${{FULL_PATH_POST_LAMBDA_FILES}} - PARENT_SCOPE -) -set(INTERLEAVED_SCAN_KERNELS_TARGET - generate_{kernel_name}_kernels_target - PARENT_SCOPE -) -""" - - # Only write if content has changed - if not cmake_file.exists() or cmake_file.read_text() != cmake_content: - cmake_file.write_text(cmake_content) - - -if __name__ == "__main__": - main() diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in new file mode 100644 index 0000000000..b8ce64c5ea --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in @@ -0,0 +1,40 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::ivf_flat::detail { + +// Instantiate the kernel template +template __global__ void interleaved_scan_kernel<@capacity@, @veclen@, @ascending@, @compute_norm@, @data_type@, @acc_type@, @idx_type@>( + const uint32_t, const @data_type@*, const uint32_t*, const @data_type@* const*, const uint32_t*, + const uint32_t, const uint32_t, const uint32_t, const uint32_t, const uint32_t*, const uint32_t, + @idx_type@* const* const, uint32_t*, @idx_type@, @idx_type@, uint32_t*, float*); + +} // namespace cuvs::neighbors::ivf_flat::detail + +#else + +#include +#include +#include "interleaved_scan_kernel_@capacity@_@veclen@_@ascending@_@compute_norm@_@type_abbrev@_@acc_abbrev@_@idx_abbrev@.h" + +using namespace cuvs::neighbors::ivf_flat::detail; + +__attribute__((__constructor__)) static void register_kernel_@capacity@_@veclen@_@ascending@_@compute_norm@_@type_abbrev@_@acc_abbrev@_@idx_abbrev@() +{ + registerAlgorithm( + "interleaved_scan_kernel_@capacity@_@veclen@_@ascending@_@compute_norm@", + embedded_interleaved_scan_kernel_@capacity@_@veclen@_@ascending@_@compute_norm@_@type_abbrev@_@acc_abbrev@_@idx_abbrev@, + sizeof(embedded_interleaved_scan_kernel_@capacity@_@veclen@_@ascending@_@compute_norm@_@type_abbrev@_@acc_abbrev@_@idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in new file mode 100644 index 0000000000..11fb87b361 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in @@ -0,0 +1,36 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include <@header_file@> + +namespace cuvs::neighbors::ivf_flat::detail { + +// Instantiate the device function template +template __device__ void compute_dist<@veclen@, @data_type@, @acc_type@>(@acc_type@&, @acc_type@, @acc_type@); + +} // namespace cuvs::neighbors::ivf_flat::detail + +#else + +#include +#include +#include "metric_@metric_name@_@veclen@_@type_abbrev@_@acc_abbrev@.h" + +using namespace cuvs::neighbors::ivf_flat::detail; + +__attribute__((__constructor__)) static void register_metric_@metric_name@_@veclen@_@type_abbrev@_@acc_abbrev@() +{ + registerAlgorithm( + "@metric_name@_@veclen@", + embedded_metric_@metric_name@_@veclen@_@type_abbrev@_@acc_abbrev@, + sizeof(embedded_metric_@metric_name@_@veclen@_@type_abbrev@_@acc_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in new file mode 100644 index 0000000000..7cb702122c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in @@ -0,0 +1,32 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include <@header_file@> + +namespace cuvs::neighbors::ivf_flat::detail { + +// Instantiate the device function template +template __device__ float post_process(float); + +} // namespace cuvs::neighbors::ivf_flat::detail + +#else + +#include +#include "@post_lambda_name@.h" + +__attribute__((__constructor__)) static void register_@post_lambda_name@() +{ + registerAlgorithm( + "@post_lambda_name@", + embedded_@post_lambda_name@, + sizeof(embedded_@post_lambda_name@)); +} + +#endif From 8ce70c221ae210c2049cecd83d6a6335126dc3d0 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 12 Dec 2025 22:15:32 +0000 Subject: [PATCH 041/158] Style --- cpp/cmake/modules/generate_jit_lto_kernels.cmake | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index a5df3da812..aafb9c557f 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -9,12 +9,12 @@ include_guard(GLOBAL) function(parse_data_type_configs config data_type acc_type veclens type_abbrev acc_abbrev) if(config MATCHES [==[^([^,]+),([^,]+),\[([0-9]+(,[0-9]+)*)?\],([^,]+),([^,]+)$]==]) - set("${data_type}" "${CMAKE_MATCH_1}" PARENT_SCOPE) - set("${acc_type}" "${CMAKE_MATCH_2}" PARENT_SCOPE) + set(${data_type} "${CMAKE_MATCH_1}" PARENT_SCOPE) + set(${acc_type} "${CMAKE_MATCH_2}" PARENT_SCOPE) string(REPLACE "," ";" veclens_value "${CMAKE_MATCH_3}") - set("${veclens}" "${veclens_value}" PARENT_SCOPE) - set("${type_abbrev}" "${CMAKE_MATCH_5}" PARENT_SCOPE) - set("${acc_abbrev}" "${CMAKE_MATCH_6}" PARENT_SCOPE) + set(${veclens} "${veclens_value}" PARENT_SCOPE) + set(${type_abbrev} "${CMAKE_MATCH_5}" PARENT_SCOPE) + set(${acc_abbrev} "${CMAKE_MATCH_6}" PARENT_SCOPE) else() message(FATAL_ERROR "Invalid data type config: ${config}") endif() From fdc42399f5b6ecedb90161f08c9dc6ae5a1800fc Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 12 Dec 2025 22:34:47 +0000 Subject: [PATCH 042/158] Style --- .../modules/generate_jit_lto_kernels.cmake | 70 +++++++++++++------ 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index aafb9c557f..25e2735039 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -9,12 +9,27 @@ include_guard(GLOBAL) function(parse_data_type_configs config data_type acc_type veclens type_abbrev acc_abbrev) if(config MATCHES [==[^([^,]+),([^,]+),\[([0-9]+(,[0-9]+)*)?\],([^,]+),([^,]+)$]==]) - set(${data_type} "${CMAKE_MATCH_1}" PARENT_SCOPE) - set(${acc_type} "${CMAKE_MATCH_2}" PARENT_SCOPE) + set(${data_type} + "${CMAKE_MATCH_1}" + PARENT_SCOPE + ) + set(${acc_type} + "${CMAKE_MATCH_2}" + PARENT_SCOPE + ) string(REPLACE "," ";" veclens_value "${CMAKE_MATCH_3}") - set(${veclens} "${veclens_value}" PARENT_SCOPE) - set(${type_abbrev} "${CMAKE_MATCH_5}" PARENT_SCOPE) - set(${acc_abbrev} "${CMAKE_MATCH_6}" PARENT_SCOPE) + set(${veclens} + "${veclens_value}" + PARENT_SCOPE + ) + set(${type_abbrev} + "${CMAKE_MATCH_5}" + PARENT_SCOPE + ) + set(${acc_abbrev} + "${CMAKE_MATCH_6}" + PARENT_SCOPE + ) else() message(FATAL_ERROR "Invalid data type config: ${config}") endif() @@ -27,11 +42,8 @@ function(generate_jit_lto_kernels) set(capacities 0 1 2 4 8 16 32 64 128 256) set(ascending_values true false) set(compute_norm_values true false) - set(data_type_configs - "float,float,[1,4],f,f" - "__half,__half,[1,8],h,h" - "uint8_t,uint32_t,[1,16],uc,ui" - "int8_t,int32_t,[1,16],sc,i" + set(data_type_configs "float,float,[1,4],f,f" "__half,__half,[1,8],h,h" + "uint8_t,uint32_t,[1,16],uc,ui" "int8_t,int32_t,[1,16],sc,i" ) set(idx_type int64_t) set(idx_abbrev l) @@ -45,7 +57,9 @@ function(generate_jit_lto_kernels) foreach(capacity IN LISTS capacities) foreach(ascending IN LISTS ascending_values) foreach(compute_norm IN LISTS compute_norm_values) - set(filename "${generated_kernels_dir}/interleaved_scan_kernels/interleaved_scan_kernel_${capacity}_${veclen}_${ascending}_${compute_norm}_${type_abbrev}_${acc_abbrev}_${idx_abbrev}.cu") + set(filename + "${generated_kernels_dir}/interleaved_scan_kernels/interleaved_scan_kernel_${capacity}_${veclen}_${ascending}_${compute_norm}_${type_abbrev}_${acc_abbrev}_${idx_abbrev}.cu" + ) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in" "${filename}" @@ -63,7 +77,9 @@ function(generate_jit_lto_kernels) set(header_file "neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh") endif() - set(filename "${generated_kernels_dir}/metric_device_functions/metric_${metric_name}_${veclen}_${type_abbrev}_${acc_abbrev}.cu") + set(filename + "${generated_kernels_dir}/metric_device_functions/metric_${metric_name}_${veclen}_${type_abbrev}_${acc_abbrev}.cu" + ) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in" "${filename}" @@ -81,11 +97,12 @@ function(generate_jit_lto_kernels) set(header_file "neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh") endif() - set(filename "${generated_kernels_dir}/filter_device_functions/${filter_name}.cu") + set(filename + "${generated_kernels_dir}/filter_device_functions/${filter_name}.cu" + ) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in" - "${filename}" - @ONLY + "${filename}" @ONLY ) list(APPEND FILTER_DEVICE_FUNCTION_FILES "${filename}") endforeach() @@ -102,14 +119,25 @@ function(generate_jit_lto_kernels) set(filename "${generated_kernels_dir}/post_lambda_device_functions/${post_lambda_name}.cu") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in" - "${filename}" - @ONLY + "${filename}" @ONLY ) list(APPEND POST_LAMBDA_DEVICE_FUNCTION_FILES "${filename}") endforeach() - set(INTERLEAVED_SCAN_KERNEL_FILES "${INTERLEAVED_SCAN_KERNEL_FILES}" PARENT_SCOPE) - set(METRIC_DEVICE_FUNCTION_FILES "${METRIC_DEVICE_FUNCTION_FILES}" PARENT_SCOPE) - set(FILTER_DEVICE_FUNCTION_FILES "${FILTER_DEVICE_FUNCTION_FILES}" PARENT_SCOPE) - set(POST_LAMBDA_DEVICE_FUNCTION_FILES "${POST_LAMBDA_DEVICE_FUNCTION_FILES}" PARENT_SCOPE) + set(INTERLEAVED_SCAN_KERNEL_FILES + "${INTERLEAVED_SCAN_KERNEL_FILES}" + PARENT_SCOPE + ) + set(METRIC_DEVICE_FUNCTION_FILES + "${METRIC_DEVICE_FUNCTION_FILES}" + PARENT_SCOPE + ) + set(FILTER_DEVICE_FUNCTION_FILES + "${FILTER_DEVICE_FUNCTION_FILES}" + PARENT_SCOPE + ) + set(POST_LAMBDA_DEVICE_FUNCTION_FILES + "${POST_LAMBDA_DEVICE_FUNCTION_FILES}" + PARENT_SCOPE + ) endfunction() From be3cf0da11443dabb4af4a82caec82e194196a24 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 12 Dec 2025 22:40:12 +0000 Subject: [PATCH 043/158] Style --- cpp/cmake/modules/generate_jit_lto_kernels.cmake | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 25e2735039..c3fc231e71 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -82,8 +82,7 @@ function(generate_jit_lto_kernels) ) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in" - "${filename}" - @ONLY + "${filename}" @ONLY ) list(APPEND METRIC_DEVICE_FUNCTION_FILES "${filename}") endforeach() @@ -97,9 +96,7 @@ function(generate_jit_lto_kernels) set(header_file "neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh") endif() - set(filename - "${generated_kernels_dir}/filter_device_functions/${filter_name}.cu" - ) + set(filename "${generated_kernels_dir}/filter_device_functions/${filter_name}.cu") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in" "${filename}" @ONLY From 7e644c38775c6417fb213abe21b94172dc8d08ea Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 12 Dec 2025 22:48:58 +0000 Subject: [PATCH 044/158] Lint --- .../modules/generate_jit_lto_kernels.cmake | 62 ++++++++++++------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index c3fc231e71..6efe4d40be 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -7,29 +7,45 @@ include_guard(GLOBAL) -function(parse_data_type_configs config data_type acc_type veclens type_abbrev acc_abbrev) +function(parse_data_type_configs config) + set(options) + set(one_value DATA_TYPE ACC_TYPE VECLENS TYPE_ABBREV ACC_ABBREV) + set(multi_value) + + cmake_parse_arguments(_JIT_LTO "${options}" "${one_value}" "${multi_value}" ${ARGN}) + if(config MATCHES [==[^([^,]+),([^,]+),\[([0-9]+(,[0-9]+)*)?\],([^,]+),([^,]+)$]==]) - set(${data_type} - "${CMAKE_MATCH_1}" - PARENT_SCOPE - ) - set(${acc_type} - "${CMAKE_MATCH_2}" - PARENT_SCOPE - ) - string(REPLACE "," ";" veclens_value "${CMAKE_MATCH_3}") - set(${veclens} - "${veclens_value}" - PARENT_SCOPE - ) - set(${type_abbrev} - "${CMAKE_MATCH_5}" - PARENT_SCOPE - ) - set(${acc_abbrev} - "${CMAKE_MATCH_6}" - PARENT_SCOPE - ) + if(_JIT_LTO_DATA_TYPE) + set(${_JIT_LTO_DATA_TYPE} + "${CMAKE_MATCH_1}" + PARENT_SCOPE + ) + endif() + if(_JIT_LTO_ACC_TYPE) + set(${_JIT_LTO_ACC_TYPE} + "${CMAKE_MATCH_2}" + PARENT_SCOPE + ) + endif() + if(_JIT_LTO_VECLENS) + string(REPLACE "," ";" veclens_value "${CMAKE_MATCH_3}") + set(${_JIT_LTO_VECLENS} + "${veclens_value}" + PARENT_SCOPE + ) + endif() + if(_JIT_LTO_TYPE_ABBREV) + set(${_JIT_LTO_TYPE_ABBREV} + "${CMAKE_MATCH_5}" + PARENT_SCOPE + ) + endif() + if(_JIT_LTO_ACC_ABBREV) + set(${_JIT_LTO_ACC_ABBREV} + "${CMAKE_MATCH_6}" + PARENT_SCOPE + ) + endif() else() message(FATAL_ERROR "Invalid data type config: ${config}") endif() @@ -52,7 +68,7 @@ function(generate_jit_lto_kernels) set(post_lambda_configs post_identity post_sqrt post_compose) foreach(config IN LISTS data_type_configs) - parse_data_type_configs("${config}" data_type acc_type veclens type_abbrev acc_abbrev) + parse_data_type_configs("${config}" DATA_TYPE data_type ACC_TYPE acc_type VECLENS veclens TYPE_ABBREV type_abbrev ACC_ABBREV acc_abbrev) foreach(veclen IN LISTS veclens) foreach(capacity IN LISTS capacities) foreach(ascending IN LISTS ascending_values) From 235938aa04cb1fd5d1b20ca07aff642b1f310149 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 12 Dec 2025 22:55:05 +0000 Subject: [PATCH 045/158] Style, lint --- cpp/cmake/modules/generate_jit_lto_kernels.cmake | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 6efe4d40be..22f1f1e06f 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -7,7 +7,7 @@ include_guard(GLOBAL) -function(parse_data_type_configs config) +function(parse_jit_lto_data_type_configs config) set(options) set(one_value DATA_TYPE ACC_TYPE VECLENS TYPE_ABBREV ACC_ABBREV) set(multi_value) @@ -51,6 +51,7 @@ function(parse_data_type_configs config) endif() endfunction() +# cmake-lint: disable=R0915 function(generate_jit_lto_kernels) set(generated_kernels_dir "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels") string(TIMESTAMP year "%Y") @@ -68,7 +69,10 @@ function(generate_jit_lto_kernels) set(post_lambda_configs post_identity post_sqrt post_compose) foreach(config IN LISTS data_type_configs) - parse_data_type_configs("${config}" DATA_TYPE data_type ACC_TYPE acc_type VECLENS veclens TYPE_ABBREV type_abbrev ACC_ABBREV acc_abbrev) + parse_jit_lto_data_type_configs( + "${config}" DATA_TYPE data_type ACC_TYPE acc_type VECLENS veclens TYPE_ABBREV type_abbrev + ACC_ABBREV acc_abbrev + ) foreach(veclen IN LISTS veclens) foreach(capacity IN LISTS capacities) foreach(ascending IN LISTS ascending_values) From e3b749d07413e82ad803d3bafcc655718046b1f2 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 15:24:13 +0000 Subject: [PATCH 046/158] Fix nvjitlink_checker --- cpp/CMakeLists.txt | 1 + cpp/src/detail/jit_lto/nvjitlink_checker.cpp | 28 ++++++++++++++++++++ cpp/src/detail/jit_lto/nvjitlink_checker.hpp | 18 +------------ 3 files changed, 30 insertions(+), 17 deletions(-) create mode 100644 cpp/src/detail/jit_lto/nvjitlink_checker.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fb178b0641..f2dc114427 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -354,6 +354,7 @@ if(NOT BUILD_CPU_ONLY) set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/AlgorithmPlanner.cu src/detail/jit_lto/FragmentDatabase.cu src/detail/jit_lto/FragmentEntry.cu + src/detail/jit_lto/nvjitlink_checker.cpp ) endif() diff --git a/cpp/src/detail/jit_lto/nvjitlink_checker.cpp b/cpp/src/detail/jit_lto/nvjitlink_checker.cpp new file mode 100644 index 0000000000..fc0edcd27d --- /dev/null +++ b/cpp/src/detail/jit_lto/nvjitlink_checker.cpp @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "nvjitlink_checker.hpp" + +#include +#include +#include + +#include + +void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) +{ + if (result != NVJITLINK_SUCCESS) { + RAFT_FAIL("nvJITLink failed with error %s", std::to_string(result).c_str()); + size_t log_size = 0; + result = nvJitLinkGetErrorLogSize(handle, &log_size); + if (result == NVJITLINK_SUCCESS && log_size > 0) { + std::unique_ptr log{new char[log_size]}; + result = nvJitLinkGetErrorLog(handle, log.get()); + if (result == NVJITLINK_SUCCESS) { + RAFT_FAIL("AlgorithmPlanner nvJITLink error log: %s", std::string(log.get()).c_str()); + } + } + } +} diff --git a/cpp/src/detail/jit_lto/nvjitlink_checker.hpp b/cpp/src/detail/jit_lto/nvjitlink_checker.hpp index c712b41a1b..b8a349988e 100644 --- a/cpp/src/detail/jit_lto/nvjitlink_checker.hpp +++ b/cpp/src/detail/jit_lto/nvjitlink_checker.hpp @@ -5,23 +5,7 @@ #pragma once -#include #include -#include // We can make a better RAII wrapper around nvjitlinkhandle -void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) -{ - if (result != NVJITLINK_SUCCESS) { - RAFT_FAIL("nvJITLink failed with error " + std::to_string(result)); - size_t log_size = 0; - result = nvJitLinkGetErrorLogSize(handle, &log_size); - if (result == NVJITLINK_SUCCESS && log_size > 0) { - std::unique_ptr log{new char[log_size]}; - result = nvJitLinkGetErrorLog(handle, log.get()); - if (result == NVJITLINK_SUCCESS) { - RAFT_FAIL("AlgorithmPlanner nvJITLink error log: " + std::string(log.get())); - } - } - } -} +void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result); From f42ae3fd820103ab48d4fd58c99e4b7f849132b4 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 15:35:25 +0000 Subject: [PATCH 047/158] Style --- cpp/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f2dc114427..9010861fb7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -352,8 +352,10 @@ if(NOT BUILD_CPU_ONLY) generate_jit_lto_kernels() set(JIT_LTO_FILES - src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/AlgorithmPlanner.cu - src/detail/jit_lto/FragmentDatabase.cu src/detail/jit_lto/FragmentEntry.cu + src/detail/jit_lto/AlgorithmLauncher.cu + src/detail/jit_lto/AlgorithmPlanner.cu + src/detail/jit_lto/FragmentDatabase.cu + src/detail/jit_lto/FragmentEntry.cu src/detail/jit_lto/nvjitlink_checker.cpp ) endif() From 5ce7aab92ea28b8df5d709c0f1da1f4502db05e3 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 19:40:45 +0000 Subject: [PATCH 048/158] Refactor JIT LTO kernel compilation --- cpp/CMakeLists.txt | 50 +------- .../modules/detail/generate_header.cmake | 53 -------- cpp/cmake/modules/embed_fatbins.cmake | 52 -------- .../modules/generate_jit_lto_kernels.cmake | 117 ++++++++++++++---- 4 files changed, 95 insertions(+), 177 deletions(-) delete mode 100644 cpp/cmake/modules/detail/generate_header.cmake delete mode 100644 cpp/cmake/modules/embed_fatbins.cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b33a878fe1..17cf4b7fc1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -344,12 +344,9 @@ if(NOT BUILD_CPU_ONLY) endif() if(JIT_LTO_COMPILATION) - # this is needed to embed fatbins to JIT at runtime - include(cmake/modules/embed_fatbins.cmake) - # Generate interleaved scan kernel files at build time include(cmake/modules/generate_jit_lto_kernels.cmake) - generate_jit_lto_kernels() + generate_jit_lto_kernels(jit_lto_kernels) set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu @@ -601,41 +598,6 @@ if(NOT BUILD_CPU_ONLY) INTERFACE "$" ) - if(JIT_LTO_COMPILATION) - add_library( - cuvs_jit_lto_fatbins OBJECT - ${INTERLEAVED_SCAN_KERNEL_FILES} ${METRIC_DEVICE_FUNCTION_FILES} - ${FILTER_DEVICE_FUNCTION_FILES} ${POST_LAMBDA_DEVICE_FUNCTION_FILES} - ) - - target_compile_definitions(cuvs_jit_lto_fatbins PRIVATE BUILD_KERNEL) - target_include_directories( - cuvs_jit_lto_fatbins - PRIVATE "$" - "$" - "$" - ) - target_compile_options( - cuvs_jit_lto_fatbins PRIVATE -Xfatbin=--compress-all --compress-mode=size - ) - target_compile_options( - cuvs_jit_lto_fatbins PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" - ) - - set_target_properties( - cuvs_jit_lto_fatbins - PROPERTIES CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION ON - CUDA_FATBIN_COMPILATION ON - POSITION_INDEPENDENT_CODE ON - INTERPROCEDURAL_OPTIMIZATION ON - ) - target_link_libraries(cuvs_jit_lto_fatbins PRIVATE rmm::rmm raft::raft CCCL::CCCL) - endif() - # Endian detection include(TestBigEndian) test_big_endian(BIG_ENDIAN) @@ -707,12 +669,9 @@ if(NOT BUILD_CPU_ONLY) $<$:CUDA::nvtx3> PRIVATE nvidia::cutlass::cutlass $ cuvs-cagra-search $<$:CUDA::nvJitLink> + $<$:jit_lto_kernels> ) - if(JIT_LTO_COMPILATION) - embed_fatbins(cuvs cuvs_jit_lto_fatbins) - endif() - # ensure CUDA symbols aren't relocated to the middle of the debug build binaries file( WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld" @@ -769,11 +728,8 @@ SECTIONS $> # header only PRIVATE nvidia::cutlass::cutlass $ $<$:CUDA::nvJitLink> $<$:CUDA::nvtx3> + $:jit_lto_kernels>> ) - - if(JIT_LTO_COMPILATION) - embed_fatbins(cuvs_static cuvs_jit_lto_fatbins) - endif() endif() # ################################################################################################ diff --git a/cpp/cmake/modules/detail/generate_header.cmake b/cpp/cmake/modules/detail/generate_header.cmake deleted file mode 100644 index 03c2680304..0000000000 --- a/cpp/cmake/modules/detail/generate_header.cmake +++ /dev/null @@ -1,53 +0,0 @@ -# ============================================================================= -# cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. -# SPDX-License-Identifier: Apache-2.0 -# cmake-format: on -# ============================================================================= - -# Read objects from response file to avoid argument length issues -if(DEFINED OBJECTS_RESPONSE_FILE) - file(READ "${OBJECTS_RESPONSE_FILE}" objects_content) - string(STRIP "${objects_content}" objects_content) - # Split by newlines since we joined with \n in the CMake file - string(REPLACE "\n" ";" objects_list "${objects_content}") -endif() - -# Create output directory if it doesn't exist -file(MAKE_DIRECTORY "${OUTPUT_DIR}") - -set(generated_headers) -foreach(obj ${objects_list}) - # Skip empty entries - if(NOT obj STREQUAL "") - get_filename_component(obj_ext ${obj} EXT) - get_filename_component(obj_name ${obj} NAME_WE) - get_filename_component(obj_dir ${obj} DIRECTORY) - - if(obj_ext MATCHES ".fatbin") - # Generate individual header file for this FATBIN - set(header_file "${OUTPUT_DIR}/${obj_name}.h") - - set(args -c -p 0x0 --name embedded_${obj_name} ${obj}) - execute_process( - COMMAND "${BIN_TO_C_COMMAND}" ${args} - WORKING_DIRECTORY ${obj_dir} - RESULT_VARIABLE result - OUTPUT_VARIABLE output - ERROR_VARIABLE error_var - ) - if(NOT result EQUAL 0) - message(FATAL_ERROR "Failed to process ${obj}: ${error_var}") - endif() - - # Write individual header file - file(WRITE "${header_file}" "${output}") - list(APPEND generated_headers "${header_file}") - endif() - endif() -endforeach() - -# Create a stamp file to indicate completion -file(WRITE "${STAMP_FILE}" "Headers generated: ${generated_headers}") -list(LENGTH generated_headers num_headers) -message(VERBOSE "Generated ${num_headers} individual FATBIN headers") diff --git a/cpp/cmake/modules/embed_fatbins.cmake b/cpp/cmake/modules/embed_fatbins.cmake deleted file mode 100644 index 5cfb74b011..0000000000 --- a/cpp/cmake/modules/embed_fatbins.cmake +++ /dev/null @@ -1,52 +0,0 @@ -# ============================================================================= -# cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. -# SPDX-License-Identifier: Apache-2.0 -# cmake-format: on -# ============================================================================= - -include_guard(GLOBAL) - -function(embed_fatbins library_name kernel_target) - find_package(CUDAToolkit REQUIRED) - find_program( - bin_to_c - NAMES bin2c - PATHS ${CUDAToolkit_BIN_DIR} - ) - - set(output_dir ${CMAKE_CURRENT_BINARY_DIR}/${library_name}) - - # Create a response file to avoid "argument list too long" errors - set(objects_response_file ${CMAKE_CURRENT_BINARY_DIR}/embed_fatbins/${library_name}_objects.rsp) - - # Write the objects list to a response file using file(GENERATE) which handles generator - # expressions - file( - GENERATE - OUTPUT "${objects_response_file}" - CONTENT "$,\n>\n" - ) - - # Generate individual headers for each FATBIN object - add_custom_command( - OUTPUT "${output_dir}/headers_generated.stamp" - COMMAND - ${CMAKE_COMMAND} "-DBIN_TO_C_COMMAND=${bin_to_c}" - "-DOBJECTS_RESPONSE_FILE=${objects_response_file}" "-DOUTPUT_DIR=${output_dir}" - "-DSTAMP_FILE=${output_dir}/headers_generated.stamp" -P - ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/detail/generate_header.cmake - VERBATIM - DEPENDS "${objects_response_file}" $ - COMMENT "Converting FATBIN kernels to individual C++ headers" - ) - - # get the sources of `kernel_target` and add them as CUDA sources so we re-compile them to get the - # inline registration logic - get_target_property(output_sources ${kernel_target} SOURCES) - - # add those c++ sources to `library_name` - target_sources(${library_name} PRIVATE "${output_dir}/headers_generated.stamp" ${output_sources}) - target_compile_features(${library_name} PRIVATE cxx_std_20) - target_include_directories(${library_name} PRIVATE ${output_dir}) -endfunction() diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 22f1f1e06f..918e77d125 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -7,6 +7,54 @@ include_guard(GLOBAL) +function(embed_jit_lto_fatbin) + set(options) + set(one_value FATBIN_TARGET FATBIN_SOURCE EMBEDDED_TARGET EMBEDDED_HEADER EMBEDDED_ARRAY) + set(multi_value) + + cmake_parse_arguments(_JIT_LTO "${options}" "${one_value}" "${multi_value}" ${ARGN}) + + find_package(CUDAToolkit REQUIRED) + find_program( + bin_to_c + NAMES bin2c + PATHS ${CUDAToolkit_BIN_DIR} + ) + + add_library(${_JIT_LTO_FATBIN_TARGET} OBJECT "${_JIT_LTO_FATBIN_SOURCE}") + target_compile_definitions(${_JIT_LTO_FATBIN_TARGET} PRIVATE BUILD_KERNEL) + target_include_directories( + ${_JIT_LTO_FATBIN_TARGET} + PRIVATE "$" + "$" + "$" + ) + target_compile_options( + ${_JIT_LTO_FATBIN_TARGET} PRIVATE -Xfatbin=--compress-all --compress-mode=size "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + ) + set_target_properties( + ${_JIT_LTO_FATBIN_TARGET} + PROPERTIES CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + CUDA_SEPARABLE_COMPILATION ON + CUDA_FATBIN_COMPILATION ON + POSITION_INDEPENDENT_CODE ON + INTERPROCEDURAL_OPTIMIZATION ON + ) + target_link_libraries(${_JIT_LTO_FATBIN_TARGET} PRIVATE rmm::rmm raft::raft CCCL::CCCL) + + add_custom_command( + OUTPUT "${_JIT_LTO_EMBEDDED_HEADER}" + COMMAND "${bin_to_c}" -c -p 0x0 --name "${_JIT_LTO_EMBEDDED_ARRAY}" --static $ > "${_JIT_LTO_EMBEDDED_HEADER}" + DEPENDS $ + ) + target_sources(${_JIT_LTO_EMBEDDED_TARGET} PRIVATE "${_JIT_LTO_FATBIN_SOURCE}" "${_JIT_LTO_EMBEDDED_HEADER}") + cmake_path(GET _JIT_LTO_EMBEDDED_HEADER PARENT_PATH header_dir) + target_include_directories(${_JIT_LTO_EMBEDDED_TARGET} PRIVATE "${header_dir}") +endfunction() + function(parse_jit_lto_data_type_configs config) set(options) set(one_value DATA_TYPE ACC_TYPE VECLENS TYPE_ABBREV ACC_ABBREV) @@ -52,7 +100,15 @@ function(parse_jit_lto_data_type_configs config) endfunction() # cmake-lint: disable=R0915 -function(generate_jit_lto_kernels) +function(generate_jit_lto_kernels target) + add_library(${target} OBJECT) + target_include_directories( + ${target} + PRIVATE "$" + "$" + "$" + ) + set(generated_kernels_dir "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels") string(TIMESTAMP year "%Y") @@ -77,15 +133,22 @@ function(generate_jit_lto_kernels) foreach(capacity IN LISTS capacities) foreach(ascending IN LISTS ascending_values) foreach(compute_norm IN LISTS compute_norm_values) + set(kernel_name "interleaved_scan_kernel_${capacity}_${veclen}_${ascending}_${compute_norm}_${type_abbrev}_${acc_abbrev}_${idx_abbrev}") set(filename - "${generated_kernels_dir}/interleaved_scan_kernels/interleaved_scan_kernel_${capacity}_${veclen}_${ascending}_${compute_norm}_${type_abbrev}_${acc_abbrev}_${idx_abbrev}.cu" + "${generated_kernels_dir}/interleaved_scan_kernels/fatbin_${kernel_name}.cu" ) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in" "${filename}" @ONLY ) - list(APPEND INTERLEAVED_SCAN_KERNEL_FILES "${filename}") + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/interleaved_scan_kernels/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) endforeach() endforeach() endforeach() @@ -97,14 +160,21 @@ function(generate_jit_lto_kernels) set(header_file "neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh") endif() + set(kernel_name "metric_${metric_name}_${veclen}_${type_abbrev}_${acc_abbrev}") set(filename - "${generated_kernels_dir}/metric_device_functions/metric_${metric_name}_${veclen}_${type_abbrev}_${acc_abbrev}.cu" + "${generated_kernels_dir}/metric_device_functions/fatbin_${kernel_name}.cu" ) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in" "${filename}" @ONLY ) - list(APPEND METRIC_DEVICE_FUNCTION_FILES "${filename}") + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/metric_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) endforeach() endforeach() endforeach() @@ -116,12 +186,19 @@ function(generate_jit_lto_kernels) set(header_file "neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh") endif() - set(filename "${generated_kernels_dir}/filter_device_functions/${filter_name}.cu") + set(kernel_name "${filter_name}") + set(filename "${generated_kernels_dir}/filter_device_functions/fatbin_${kernel_name}.cu") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in" "${filename}" @ONLY ) - list(APPEND FILTER_DEVICE_FUNCTION_FILES "${filename}") + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/filter_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) endforeach() foreach(post_lambda_name IN LISTS post_lambda_configs) @@ -133,28 +210,18 @@ function(generate_jit_lto_kernels) set(header_file "neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh") endif() + set(kernel_name "${post_lambda_name}") set(filename "${generated_kernels_dir}/post_lambda_device_functions/${post_lambda_name}.cu") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in" "${filename}" @ONLY ) - list(APPEND POST_LAMBDA_DEVICE_FUNCTION_FILES "${filename}") + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/post_lambda_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) endforeach() - - set(INTERLEAVED_SCAN_KERNEL_FILES - "${INTERLEAVED_SCAN_KERNEL_FILES}" - PARENT_SCOPE - ) - set(METRIC_DEVICE_FUNCTION_FILES - "${METRIC_DEVICE_FUNCTION_FILES}" - PARENT_SCOPE - ) - set(FILTER_DEVICE_FUNCTION_FILES - "${FILTER_DEVICE_FUNCTION_FILES}" - PARENT_SCOPE - ) - set(POST_LAMBDA_DEVICE_FUNCTION_FILES - "${POST_LAMBDA_DEVICE_FUNCTION_FILES}" - PARENT_SCOPE - ) endfunction() From eaad347dbdce2e3085c770ba7607331292b6ed55 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 19:46:59 +0000 Subject: [PATCH 049/158] Style --- cpp/CMakeLists.txt | 12 ++++--- cpp/cmake/config.json | 9 +++++ .../modules/generate_jit_lto_kernels.cmake | 34 +++++++++++-------- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 17cf4b7fc1..6c67d723e5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -667,8 +667,10 @@ if(NOT BUILD_CPU_ONLY) $> $> $<$:CUDA::nvtx3> - PRIVATE nvidia::cutlass::cutlass $ - cuvs-cagra-search $<$:CUDA::nvJitLink> + PRIVATE nvidia::cutlass::cutlass + $ + cuvs-cagra-search + $<$:CUDA::nvJitLink> $<$:jit_lto_kernels> ) @@ -726,8 +728,10 @@ SECTIONS ${CUVS_CTK_MATH_DEPENDENCIES} $ # needs to be public for DT_NEEDED $> # header only - PRIVATE nvidia::cutlass::cutlass $ - $<$:CUDA::nvJitLink> $<$:CUDA::nvtx3> + PRIVATE nvidia::cutlass::cutlass + $ + $<$:CUDA::nvJitLink> + $<$:CUDA::nvtx3> $:jit_lto_kernels>> ) endif() diff --git a/cpp/cmake/config.json b/cpp/cmake/config.json index a9f1b53007..aa46006a44 100644 --- a/cpp/cmake/config.json +++ b/cpp/cmake/config.json @@ -10,6 +10,15 @@ "ADDITIONAL_DEP": "?", "PATH": "*" } + }, + "embed_jit_lto_fatbin": { + "kwargs": { + "FATBIN_TARGET": 1, + "FATBIN_SOURCE": 1, + "EMBEDDED_TARGET": 1, + "EMBEDDED_HEADER": 1, + "EMBEDDED_ARRAY": 1 + } } } }, diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 918e77d125..64ad210fc5 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -30,27 +30,33 @@ function(embed_jit_lto_fatbin) "$" ) target_compile_options( - ${_JIT_LTO_FATBIN_TARGET} PRIVATE -Xfatbin=--compress-all --compress-mode=size "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" + ${_JIT_LTO_FATBIN_TARGET} + PRIVATE -Xfatbin=--compress-all + --compress-mode=size + "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" ) set_target_properties( ${_JIT_LTO_FATBIN_TARGET} PROPERTIES CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION ON - CUDA_FATBIN_COMPILATION ON - POSITION_INDEPENDENT_CODE ON - INTERPROCEDURAL_OPTIMIZATION ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + CUDA_SEPARABLE_COMPILATION ON + CUDA_FATBIN_COMPILATION ON + POSITION_INDEPENDENT_CODE ON + INTERPROCEDURAL_OPTIMIZATION ON ) target_link_libraries(${_JIT_LTO_FATBIN_TARGET} PRIVATE rmm::rmm raft::raft CCCL::CCCL) add_custom_command( OUTPUT "${_JIT_LTO_EMBEDDED_HEADER}" - COMMAND "${bin_to_c}" -c -p 0x0 --name "${_JIT_LTO_EMBEDDED_ARRAY}" --static $ > "${_JIT_LTO_EMBEDDED_HEADER}" + COMMAND "${bin_to_c}" -c -p 0x0 --name "${_JIT_LTO_EMBEDDED_ARRAY}" --static + $ > "${_JIT_LTO_EMBEDDED_HEADER}" DEPENDS $ ) - target_sources(${_JIT_LTO_EMBEDDED_TARGET} PRIVATE "${_JIT_LTO_FATBIN_SOURCE}" "${_JIT_LTO_EMBEDDED_HEADER}") + target_sources( + ${_JIT_LTO_EMBEDDED_TARGET} PRIVATE "${_JIT_LTO_FATBIN_SOURCE}" "${_JIT_LTO_EMBEDDED_HEADER}" + ) cmake_path(GET _JIT_LTO_EMBEDDED_HEADER PARENT_PATH header_dir) target_include_directories(${_JIT_LTO_EMBEDDED_TARGET} PRIVATE "${header_dir}") endfunction() @@ -133,7 +139,9 @@ function(generate_jit_lto_kernels target) foreach(capacity IN LISTS capacities) foreach(ascending IN LISTS ascending_values) foreach(compute_norm IN LISTS compute_norm_values) - set(kernel_name "interleaved_scan_kernel_${capacity}_${veclen}_${ascending}_${compute_norm}_${type_abbrev}_${acc_abbrev}_${idx_abbrev}") + set(kernel_name + "interleaved_scan_kernel_${capacity}_${veclen}_${ascending}_${compute_norm}_${type_abbrev}_${acc_abbrev}_${idx_abbrev}" + ) set(filename "${generated_kernels_dir}/interleaved_scan_kernels/fatbin_${kernel_name}.cu" ) @@ -161,9 +169,7 @@ function(generate_jit_lto_kernels target) endif() set(kernel_name "metric_${metric_name}_${veclen}_${type_abbrev}_${acc_abbrev}") - set(filename - "${generated_kernels_dir}/metric_device_functions/fatbin_${kernel_name}.cu" - ) + set(filename "${generated_kernels_dir}/metric_device_functions/fatbin_${kernel_name}.cu") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in" "${filename}" @ONLY From eb3b468a0d1fe823fbab592d66ddcd8e69aaa039 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 21:00:37 +0000 Subject: [PATCH 050/158] pic --- cpp/cmake/modules/generate_jit_lto_kernels.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 64ad210fc5..6cd9d62ddb 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -114,6 +114,10 @@ function(generate_jit_lto_kernels target) "$" "$" ) + set_target_properties( + ${target} + PROPERTIES POSITION_INDEPENDENT_CODE ON + ) set(generated_kernels_dir "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels") string(TIMESTAMP year "%Y") From 912279cf1d46a9604731755a9283459d58b6a6fd Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 21:10:58 +0000 Subject: [PATCH 051/158] style --- cpp/cmake/modules/generate_jit_lto_kernels.cmake | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 6cd9d62ddb..832ae80723 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -114,10 +114,7 @@ function(generate_jit_lto_kernels target) "$" "$" ) - set_target_properties( - ${target} - PROPERTIES POSITION_INDEPENDENT_CODE ON - ) + set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON) set(generated_kernels_dir "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels") string(TIMESTAMP year "%Y") From 19f1af3e520bc18f257ebee858dd90a8eeaeaab7 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 21:47:38 +0000 Subject: [PATCH 052/158] Verbose build --- ci/build_standalone_c.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/build_standalone_c.sh b/ci/build_standalone_c.sh index a92490bb3e..7710b0cb85 100755 --- a/ci/build_standalone_c.sh +++ b/ci/build_standalone_c.sh @@ -65,7 +65,7 @@ scl enable gcc-toolset-${TOOLSET_VERSION} -- \ -DBUILD_TESTS=OFF \ -DBUILD_SHARED_LIBS=ON \ -DCUVS_STATIC_RAPIDS_LIBRARIES=ON -cmake --build cpp/build "-j${PARALLEL_LEVEL}" +cmake --build cpp/build "-j${PARALLEL_LEVEL}" -v sccache --show-adv-stats sccache --stop-server >/dev/null 2>&1 || true @@ -78,7 +78,7 @@ scl enable gcc-toolset-${TOOLSET_VERSION} -- \ -DCUVSC_STATIC_CUVS_LIBRARY=ON \ -DCMAKE_PREFIX_PATH="$PWD/cpp/build/" \ -DBUILD_TESTS=${BUILD_C_LIB_TESTS} -cmake --build c/build "-j${PARALLEL_LEVEL}" +cmake --build c/build "-j${PARALLEL_LEVEL}" -v sccache --show-adv-stats sccache --stop-server >/dev/null 2>&1 || true From 087b943644098793099e42ce989db13aec132166 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 17:33:25 -0500 Subject: [PATCH 053/158] static --- cpp/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6c67d723e5..76adb0affc 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -732,8 +732,9 @@ SECTIONS $ $<$:CUDA::nvJitLink> $<$:CUDA::nvtx3> - $:jit_lto_kernels>> ) + # Can't do target_link_libraries() on this one, because it's a static library + target_sources(cuvs_static PRIVATE $<$:$>) endif() # ################################################################################################ From c16e1093c9b0bbb3e4d52fcae3cba349b5930229 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 22:34:47 +0000 Subject: [PATCH 054/158] style --- cpp/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 76adb0affc..f411a6e816 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -728,13 +728,13 @@ SECTIONS ${CUVS_CTK_MATH_DEPENDENCIES} $ # needs to be public for DT_NEEDED $> # header only - PRIVATE nvidia::cutlass::cutlass - $ - $<$:CUDA::nvJitLink> - $<$:CUDA::nvtx3> + PRIVATE nvidia::cutlass::cutlass $ + $<$:CUDA::nvJitLink> $<$:CUDA::nvtx3> ) # Can't do target_link_libraries() on this one, because it's a static library - target_sources(cuvs_static PRIVATE $<$:$>) + target_sources( + cuvs_static PRIVATE $<$:$> + ) endif() # ################################################################################################ From 323b79fb9c15cd1c67370f9fb6e404baa86a698f Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 15 Dec 2025 22:42:40 +0000 Subject: [PATCH 055/158] TARGET_OBJECTS --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f411a6e816..ee9d4fd1f0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -733,7 +733,7 @@ SECTIONS ) # Can't do target_link_libraries() on this one, because it's a static library target_sources( - cuvs_static PRIVATE $<$:$> + cuvs_static PRIVATE $<$:$> ) endif() From 9f13e73561f70bb8192d18c525a7aebc98a0a464 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 16 Dec 2025 21:32:16 +0000 Subject: [PATCH 056/158] Disable sccache --- ci/build_standalone_c.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/build_standalone_c.sh b/ci/build_standalone_c.sh index 7710b0cb85..9908ff7f5e 100755 --- a/ci/build_standalone_c.sh +++ b/ci/build_standalone_c.sh @@ -56,6 +56,8 @@ RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"} mkdir -p "${RAPIDS_ARTIFACTS_DIR}" export RAPIDS_ARTIFACTS_DIR +export SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 + scl enable gcc-toolset-${TOOLSET_VERSION} -- \ cmake -S cpp -B cpp/build/ -GNinja \ -DCMAKE_CUDA_HOST_COMPILER=/opt/rh/gcc-toolset-${TOOLSET_VERSION}/root/usr/bin/gcc \ From eaf9d397269a71d3a54120691488ad61b0bc37b3 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 16 Dec 2025 22:57:13 +0000 Subject: [PATCH 057/158] Recache --- ci/build_standalone_c.sh | 2 +- ci/build_wheel_libcuvs.sh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/build_standalone_c.sh b/ci/build_standalone_c.sh index 9908ff7f5e..cf8ee99ac2 100755 --- a/ci/build_standalone_c.sh +++ b/ci/build_standalone_c.sh @@ -56,7 +56,7 @@ RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"} mkdir -p "${RAPIDS_ARTIFACTS_DIR}" export RAPIDS_ARTIFACTS_DIR -export SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 +export SCCACHE_RECACHE=1 scl enable gcc-toolset-${TOOLSET_VERSION} -- \ cmake -S cpp -B cpp/build/ -GNinja \ diff --git a/ci/build_wheel_libcuvs.sh b/ci/build_wheel_libcuvs.sh index 00452469e2..11edef19cc 100755 --- a/ci/build_wheel_libcuvs.sh +++ b/ci/build_wheel_libcuvs.sh @@ -29,5 +29,7 @@ rapids-pip-retry install \ # 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) export PIP_NO_BUILD_ISOLATION=0 +export SCCACHE_RECACHE=1 + ci/build_wheel.sh libcuvs ${package_dir} ci/validate_wheel.sh ${package_dir} "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" From ce40c5160ef313816f787c36c155d2fde1665387 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 16 Dec 2025 23:53:29 +0000 Subject: [PATCH 058/158] Revert CI debugging --- ci/build_standalone_c.sh | 6 ++---- ci/build_wheel_libcuvs.sh | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/ci/build_standalone_c.sh b/ci/build_standalone_c.sh index cf8ee99ac2..a92490bb3e 100755 --- a/ci/build_standalone_c.sh +++ b/ci/build_standalone_c.sh @@ -56,8 +56,6 @@ RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"} mkdir -p "${RAPIDS_ARTIFACTS_DIR}" export RAPIDS_ARTIFACTS_DIR -export SCCACHE_RECACHE=1 - scl enable gcc-toolset-${TOOLSET_VERSION} -- \ cmake -S cpp -B cpp/build/ -GNinja \ -DCMAKE_CUDA_HOST_COMPILER=/opt/rh/gcc-toolset-${TOOLSET_VERSION}/root/usr/bin/gcc \ @@ -67,7 +65,7 @@ scl enable gcc-toolset-${TOOLSET_VERSION} -- \ -DBUILD_TESTS=OFF \ -DBUILD_SHARED_LIBS=ON \ -DCUVS_STATIC_RAPIDS_LIBRARIES=ON -cmake --build cpp/build "-j${PARALLEL_LEVEL}" -v +cmake --build cpp/build "-j${PARALLEL_LEVEL}" sccache --show-adv-stats sccache --stop-server >/dev/null 2>&1 || true @@ -80,7 +78,7 @@ scl enable gcc-toolset-${TOOLSET_VERSION} -- \ -DCUVSC_STATIC_CUVS_LIBRARY=ON \ -DCMAKE_PREFIX_PATH="$PWD/cpp/build/" \ -DBUILD_TESTS=${BUILD_C_LIB_TESTS} -cmake --build c/build "-j${PARALLEL_LEVEL}" -v +cmake --build c/build "-j${PARALLEL_LEVEL}" sccache --show-adv-stats sccache --stop-server >/dev/null 2>&1 || true diff --git a/ci/build_wheel_libcuvs.sh b/ci/build_wheel_libcuvs.sh index 11edef19cc..00452469e2 100755 --- a/ci/build_wheel_libcuvs.sh +++ b/ci/build_wheel_libcuvs.sh @@ -29,7 +29,5 @@ rapids-pip-retry install \ # 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) export PIP_NO_BUILD_ISOLATION=0 -export SCCACHE_RECACHE=1 - ci/build_wheel.sh libcuvs ${package_dir} ci/validate_wheel.sh ${package_dir} "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" From 0d0abb9fc3f115bf365c8ea439b431b1f28ac60a Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 17 Dec 2025 15:25:06 +0000 Subject: [PATCH 059/158] Install and link object library --- cpp/CMakeLists.txt | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ee9d4fd1f0..07e1b24699 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -346,7 +346,7 @@ if(NOT BUILD_CPU_ONLY) if(JIT_LTO_COMPILATION) # Generate interleaved scan kernel files at build time include(cmake/modules/generate_jit_lto_kernels.cmake) - generate_jit_lto_kernels(jit_lto_kernels) + generate_jit_lto_kernels(cuvs_jit_lto_kernels) set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu @@ -671,7 +671,7 @@ if(NOT BUILD_CPU_ONLY) $ cuvs-cagra-search $<$:CUDA::nvJitLink> - $<$:jit_lto_kernels> + $<$:cuvs_jit_lto_kernels> ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries @@ -731,9 +731,9 @@ SECTIONS PRIVATE nvidia::cutlass::cutlass $ $<$:CUDA::nvJitLink> $<$:CUDA::nvtx3> ) - # Can't do target_link_libraries() on this one, because it's a static library + # Anything that links against the static library has to have these objects target_sources( - cuvs_static PRIVATE $<$:$> + cuvs_static INTERFACE $<$:$> ) endif() @@ -774,9 +774,9 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB include(GNUInstallDirs) include(CPack) - set(target_names cuvs cuvs_static cuvs_cpp_headers cuvs_c) - set(component_names cuvs_shared cuvs_static cuvs_cpp_headers c_api) - set(export_names cuvs-shared-exports cuvs-static-exports cuvs-cpp-headers-exports cuvs-c-exports) + set(target_names cuvs cuvs_static cuvs_jit_lto_kernels cuvs_cpp_headers cuvs_c) + set(component_names cuvs_shared cuvs_static cuvs_static cuvs_cpp_headers c_api) + set(export_names cuvs-shared-exports cuvs-static-exports cuvs-static-exports cuvs-cpp-headers-exports cuvs-c-exports) foreach(target component export IN ZIP_LISTS target_names component_names export_names) if(TARGET ${target}) install( @@ -817,6 +817,8 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB ) endif() + list(REMOVE_DUPLICATES cuvs_components) + list(REMOVE_DUPLICATES cuvs_export_sets) include(cmake/modules/generate_cuvs_export.cmake) generate_cuvs_export(COMPONENTS ${cuvs_components} EXPORT_SETS ${cuvs_export_sets}) From 84bfa92c1d7826484f24c4c9586a589aec7ab4a9 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 17 Dec 2025 15:27:45 +0000 Subject: [PATCH 060/158] Style --- cpp/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 07e1b24699..0618402944 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -733,7 +733,8 @@ SECTIONS ) # Anything that links against the static library has to have these objects target_sources( - cuvs_static INTERFACE $<$:$> + cuvs_static + INTERFACE $<$:$> ) endif() @@ -776,7 +777,9 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB set(target_names cuvs cuvs_static cuvs_jit_lto_kernels cuvs_cpp_headers cuvs_c) set(component_names cuvs_shared cuvs_static cuvs_static cuvs_cpp_headers c_api) - set(export_names cuvs-shared-exports cuvs-static-exports cuvs-static-exports cuvs-cpp-headers-exports cuvs-c-exports) + set(export_names cuvs-shared-exports cuvs-static-exports cuvs-static-exports + cuvs-cpp-headers-exports cuvs-c-exports + ) foreach(target component export IN ZIP_LISTS target_names component_names export_names) if(TARGET ${target}) install( From 21241eba90560e35c1572165ff2d86bf12c399c5 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 17 Dec 2025 15:55:56 +0000 Subject: [PATCH 061/158] Alias --- cpp/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0618402944..46d5ab8d7d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -347,6 +347,7 @@ if(NOT BUILD_CPU_ONLY) # Generate interleaved scan kernel files at build time include(cmake/modules/generate_jit_lto_kernels.cmake) generate_jit_lto_kernels(cuvs_jit_lto_kernels) + add_library(cuvs::cuvs_jit_lto_kernels ALIAS cuvs_jit_lto_kernels) set(JIT_LTO_FILES src/detail/jit_lto/AlgorithmLauncher.cu @@ -671,7 +672,7 @@ if(NOT BUILD_CPU_ONLY) $ cuvs-cagra-search $<$:CUDA::nvJitLink> - $<$:cuvs_jit_lto_kernels> + $<$:cuvs::cuvs_jit_lto_kernels> ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries @@ -734,7 +735,7 @@ SECTIONS # Anything that links against the static library has to have these objects target_sources( cuvs_static - INTERFACE $<$:$> + INTERFACE $<$:$> ) endif() From 7c0ac13e885f92f600148f100770d7febcbb4c2c Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 17 Dec 2025 19:09:28 +0000 Subject: [PATCH 062/158] Make cuvs_jit_lto_kernels a static library --- cpp/CMakeLists.txt | 8 ++------ cpp/cmake/modules/generate_jit_lto_kernels.cmake | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 46d5ab8d7d..f236e56935 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -672,7 +672,7 @@ if(NOT BUILD_CPU_ONLY) $ cuvs-cagra-search $<$:CUDA::nvJitLink> - $<$:cuvs::cuvs_jit_lto_kernels> + $<$:$> ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries @@ -731,11 +731,7 @@ SECTIONS $> # header only PRIVATE nvidia::cutlass::cutlass $ $<$:CUDA::nvJitLink> $<$:CUDA::nvtx3> - ) - # Anything that links against the static library has to have these objects - target_sources( - cuvs_static - INTERFACE $<$:$> + $<$:$> ) endif() diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 832ae80723..87bb772b11 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -107,7 +107,7 @@ endfunction() # cmake-lint: disable=R0915 function(generate_jit_lto_kernels target) - add_library(${target} OBJECT) + add_library(${target} STATIC) target_include_directories( ${target} PRIVATE "$" From 880dbf207e9ab57df2a437dc1746ce28556283c5 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 17 Dec 2025 19:15:07 +0000 Subject: [PATCH 063/158] Style --- cpp/CMakeLists.txt | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f236e56935..94c763ea56 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -668,11 +668,12 @@ if(NOT BUILD_CPU_ONLY) $> $> $<$:CUDA::nvtx3> - PRIVATE nvidia::cutlass::cutlass - $ - cuvs-cagra-search - $<$:CUDA::nvJitLink> - $<$:$> + PRIVATE + nvidia::cutlass::cutlass + $ + cuvs-cagra-search + $<$:CUDA::nvJitLink> + $<$:$> ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries @@ -729,9 +730,12 @@ SECTIONS ${CUVS_CTK_MATH_DEPENDENCIES} $ # needs to be public for DT_NEEDED $> # header only - PRIVATE nvidia::cutlass::cutlass $ - $<$:CUDA::nvJitLink> $<$:CUDA::nvtx3> - $<$:$> + PRIVATE + nvidia::cutlass::cutlass + $ + $<$:CUDA::nvJitLink> + $<$:CUDA::nvtx3> + $<$:$> ) endif() From d04d7c1c0e3b94166198029b6f4485d8d9709e7e Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 17 Dec 2025 19:29:24 +0000 Subject: [PATCH 064/158] rapids_cuda_init_architectures() for C tests --- c/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 29655ced6d..0d8c2873aa 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -201,6 +201,8 @@ endif() # ################################################################################################## # * build tests ---------------------------------------------------- if(BUILD_TESTS) + include(rapids-cuda) + rapids_cuda_init_architectures(CUVS_C) enable_language(CUDA) find_package(CUDAToolkit REQUIRED) From 19581f9479aa0886e547a2d01174cf0e6d68949f Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 17 Dec 2025 20:48:36 +0000 Subject: [PATCH 065/158] Be more specific about where we search for libclang --- ci/build_docs.sh | 2 +- ci/build_rust.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index f9ab38721b..3fc3d436df 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -45,7 +45,7 @@ popd rapids-logger "Build Rust docs" pushd rust -LIBCLANG_PATH=$(dirname "$(find /opt/conda -name libclang.so | head -n 1)") +LIBCLANG_PATH=$(dirname "$(find "$CONDA_PREFIX" -name libclang.so | head -n 1)") export LIBCLANG_PATH cargo doc -p cuvs --no-deps popd diff --git a/ci/build_rust.sh b/ci/build_rust.sh index 8cc65e7f04..edce9447f2 100755 --- a/ci/build_rust.sh +++ b/ci/build_rust.sh @@ -44,7 +44,7 @@ sccache --stop-server 2>/dev/null || true # we need to set up LIBCLANG_PATH to allow rust bindgen to work, # grab it from the conda env -LIBCLANG_PATH=$(dirname "$(find /opt/conda -name libclang.so | head -n 1)") +LIBCLANG_PATH=$(dirname "$(find "$CONDA_PREFIX" -name libclang.so | head -n 1)") export LIBCLANG_PATH echo "LIBCLANG_PATH=$LIBCLANG_PATH" From a61f0193cb42496e070e5435955093e42dd76d04 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 17 Dec 2025 20:51:23 +0000 Subject: [PATCH 066/158] More libclang updates --- .github/workflows/publish-rust.yaml | 2 +- examples/rust/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-rust.yaml b/.github/workflows/publish-rust.yaml index 3b7fc41a3b..f18e480b3b 100644 --- a/.github/workflows/publish-rust.yaml +++ b/.github/workflows/publish-rust.yaml @@ -44,7 +44,7 @@ jobs: conda activate rust set -eu - LIBCLANG_PATH=$(dirname "$(find /opt/conda -name libclang.so | head -n 1)") + LIBCLANG_PATH=$(dirname "$(find "$CONDA_PREFIX" -name libclang.so | head -n 1)") export LIBCLANG_PATH echo "LIBCLANG_PATH=$LIBCLANG_PATH" diff --git a/examples/rust/README.md b/examples/rust/README.md index 57d7005050..c75047ea16 100644 --- a/examples/rust/README.md +++ b/examples/rust/README.md @@ -16,7 +16,7 @@ You may prefer to use `mamba`, as it provides significant speedup over `conda`. 1. Set up the required environment variables: ```bash -LIBCLANG_PATH=$(dirname "$(find /opt/conda -name libclang.so | head -n 1)") +LIBCLANG_PATH=$(dirname "$(find "$CONDA_PREFIX" -name libclang.so | head -n 1)") export LIBCLANG_PATH echo "LIBCLANG_PATH=$LIBCLANG_PATH" ``` From 2eeb91304b982c50ab71ef9be5d5705b0581f7ea Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 17 Dec 2025 21:24:23 +0000 Subject: [PATCH 067/158] Revert "Fix libclang download for Rust, CUDA initialization for C tests" This reverts commit a10ea05db3c9b5c74533e78d1d4a5c918d6fc3b6. --- .github/workflows/publish-rust.yaml | 2 +- c/CMakeLists.txt | 2 -- ci/build_docs.sh | 2 +- ci/build_rust.sh | 2 +- examples/rust/README.md | 2 +- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/publish-rust.yaml b/.github/workflows/publish-rust.yaml index f18e480b3b..3b7fc41a3b 100644 --- a/.github/workflows/publish-rust.yaml +++ b/.github/workflows/publish-rust.yaml @@ -44,7 +44,7 @@ jobs: conda activate rust set -eu - LIBCLANG_PATH=$(dirname "$(find "$CONDA_PREFIX" -name libclang.so | head -n 1)") + LIBCLANG_PATH=$(dirname "$(find /opt/conda -name libclang.so | head -n 1)") export LIBCLANG_PATH echo "LIBCLANG_PATH=$LIBCLANG_PATH" diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 0d8c2873aa..29655ced6d 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -201,8 +201,6 @@ endif() # ################################################################################################## # * build tests ---------------------------------------------------- if(BUILD_TESTS) - include(rapids-cuda) - rapids_cuda_init_architectures(CUVS_C) enable_language(CUDA) find_package(CUDAToolkit REQUIRED) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 3fc3d436df..f9ab38721b 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -45,7 +45,7 @@ popd rapids-logger "Build Rust docs" pushd rust -LIBCLANG_PATH=$(dirname "$(find "$CONDA_PREFIX" -name libclang.so | head -n 1)") +LIBCLANG_PATH=$(dirname "$(find /opt/conda -name libclang.so | head -n 1)") export LIBCLANG_PATH cargo doc -p cuvs --no-deps popd diff --git a/ci/build_rust.sh b/ci/build_rust.sh index edce9447f2..8cc65e7f04 100755 --- a/ci/build_rust.sh +++ b/ci/build_rust.sh @@ -44,7 +44,7 @@ sccache --stop-server 2>/dev/null || true # we need to set up LIBCLANG_PATH to allow rust bindgen to work, # grab it from the conda env -LIBCLANG_PATH=$(dirname "$(find "$CONDA_PREFIX" -name libclang.so | head -n 1)") +LIBCLANG_PATH=$(dirname "$(find /opt/conda -name libclang.so | head -n 1)") export LIBCLANG_PATH echo "LIBCLANG_PATH=$LIBCLANG_PATH" diff --git a/examples/rust/README.md b/examples/rust/README.md index c75047ea16..57d7005050 100644 --- a/examples/rust/README.md +++ b/examples/rust/README.md @@ -16,7 +16,7 @@ You may prefer to use `mamba`, as it provides significant speedup over `conda`. 1. Set up the required environment variables: ```bash -LIBCLANG_PATH=$(dirname "$(find "$CONDA_PREFIX" -name libclang.so | head -n 1)") +LIBCLANG_PATH=$(dirname "$(find /opt/conda -name libclang.so | head -n 1)") export LIBCLANG_PATH echo "LIBCLANG_PATH=$LIBCLANG_PATH" ``` From 088c21e353943ed5a979ea570f7d2f8dcfca477b Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 14 Jan 2026 17:21:19 -0500 Subject: [PATCH 068/158] Copyright --- cpp/cmake/modules/generate_jit_lto_kernels.cmake | 2 +- cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h | 2 +- cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h | 2 +- cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h | 2 +- cpp/include/cuvs/detail/jit_lto/FragmentEntry.h | 2 +- cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h | 2 +- cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h | 2 +- .../cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp | 2 +- cpp/include/cuvs/neighbors/common.hpp | 2 +- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 2 +- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 2 +- cpp/src/detail/jit_lto/FragmentDatabase.cu | 2 +- cpp/src/detail/jit_lto/FragmentEntry.cu | 2 +- cpp/src/detail/jit_lto/nvjitlink_checker.cpp | 2 +- cpp/src/detail/jit_lto/nvjitlink_checker.hpp | 2 +- .../ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh | 2 +- cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh | 2 +- cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh | 2 +- cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh | 2 +- cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh | 2 +- .../ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp | 2 +- .../ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh | 2 +- .../neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh | 2 +- cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh | 2 +- cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh | 2 +- cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh | 2 +- python/cuvs_bench/cuvs_bench/run/runners.py | 2 +- 27 files changed, 27 insertions(+), 27 deletions(-) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 87bb772b11..dcf985b391 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h index bcc50d8207..7a578a8306 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h index 94cf419de6..f6f485962b 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h index 007726532e..b3699d7c6d 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h index 39eca2f187..ed0edd59b2 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h index 8a0e54f699..eea74b782a 100644 --- a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h +++ b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h b/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h index bbf3fe0ca3..1910373dcb 100644 --- a/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h +++ b/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp b/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp index b201f7c044..d9ed7e6b0b 100644 --- a/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp +++ b/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 0fff2994fb..5cf73875da 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 5b786e95cf..0a21c75d63 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 2d9e200405..73c6b32d34 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index 2b51630372..89fa415b83 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/detail/jit_lto/FragmentEntry.cu b/cpp/src/detail/jit_lto/FragmentEntry.cu index 2f2832b95f..ecc94faf76 100644 --- a/cpp/src/detail/jit_lto/FragmentEntry.cu +++ b/cpp/src/detail/jit_lto/FragmentEntry.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/detail/jit_lto/nvjitlink_checker.cpp b/cpp/src/detail/jit_lto/nvjitlink_checker.cpp index fc0edcd27d..adea2853bf 100644 --- a/cpp/src/detail/jit_lto/nvjitlink_checker.cpp +++ b/cpp/src/detail/jit_lto/nvjitlink_checker.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/detail/jit_lto/nvjitlink_checker.hpp b/cpp/src/detail/jit_lto/nvjitlink_checker.hpp index b8a349988e..12b062d795 100644 --- a/cpp/src/detail/jit_lto/nvjitlink_checker.hpp +++ b/cpp/src/detail/jit_lto/nvjitlink_checker.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh index fbd280a528..81833a63b1 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index bb7ba57a19..85575c3c5f 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh index f7fc69e4ae..d6d65d8707 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh index 7f171d5729..07fc4a21f5 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh index 61af472469..aad15d64bc 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp index 82124f3409..beab9e858b 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh index f197d3f218..8fd2a4c04b 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh index c6600c526c..afc4c401fa 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh index 6cbb19c3b2..fe3473ba4a 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh index 7ea402a312..04fd825c92 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh index ca23b2a6a5..28009b04e7 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/python/cuvs_bench/cuvs_bench/run/runners.py b/python/cuvs_bench/cuvs_bench/run/runners.py index 243bb2e0e4..0377ea7f45 100644 --- a/python/cuvs_bench/cuvs_bench/run/runners.py +++ b/python/cuvs_bench/cuvs_bench/run/runners.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # From 8ca106277dc0258ff98a1a9dd0bece0fc915a338 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 22 Jan 2026 16:55:20 -0500 Subject: [PATCH 069/158] Apply suggestions from code review Co-authored-by: Dante Gama Dessavre --- cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h | 2 +- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h index f6f485962b..8126fa8866 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h @@ -24,6 +24,6 @@ struct AlgorithmPlanner { private: void add_entrypoint(); void add_device_functions(); - std::string get_device_functions_key(); + std::string get_device_functions_key() const; std::shared_ptr build(); }; diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 73c6b32d34..c5d954ff79 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -34,7 +34,7 @@ void AlgorithmPlanner::add_device_functions() } } -std::string AlgorithmPlanner::get_device_functions_key() +std::string AlgorithmPlanner::get_device_functions_key() const { std::string key = ""; for (const auto& device_function : this->device_functions) { From b8c0d420b4dae9f9fefddda47c332d73f415c862 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 22 Jan 2026 22:10:00 +0000 Subject: [PATCH 070/158] address some review comments --- cpp/src/detail/jit_lto/nvjitlink_checker.cpp | 11 +++++------ .../ivf_flat/ivf_flat_interleaved_scan_jit.cuh | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/src/detail/jit_lto/nvjitlink_checker.cpp b/cpp/src/detail/jit_lto/nvjitlink_checker.cpp index adea2853bf..6f9ae988db 100644 --- a/cpp/src/detail/jit_lto/nvjitlink_checker.cpp +++ b/cpp/src/detail/jit_lto/nvjitlink_checker.cpp @@ -14,15 +14,14 @@ void check_nvjitlink_result(nvJitLinkHandle handle, nvJitLinkResult result) { if (result != NVJITLINK_SUCCESS) { - RAFT_FAIL("nvJITLink failed with error %s", std::to_string(result).c_str()); - size_t log_size = 0; - result = nvJitLinkGetErrorLogSize(handle, &log_size); + std::string error_msg = "nvJITLink failed with error " + std::to_string(result); + size_t log_size = 0; + result = nvJitLinkGetErrorLogSize(handle, &log_size); if (result == NVJITLINK_SUCCESS && log_size > 0) { std::unique_ptr log{new char[log_size]}; result = nvJitLinkGetErrorLog(handle, log.get()); - if (result == NVJITLINK_SUCCESS) { - RAFT_FAIL("AlgorithmPlanner nvJITLink error log: %s", std::string(log.get()).c_str()); - } + if (result == NVJITLINK_SUCCESS) { error_msg += "\n" + std::string(log.get()); } } + RAFT_FAIL("AlgorithmPlanner nvJITLink error log: %s", error_msg.c_str()); } } diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index 85575c3c5f..be8652dd59 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -102,7 +102,7 @@ constexpr auto get_post_lambda_name() inline uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMemSize, - CUkernel func) + cudaKernel_t func) { int dev_id; RAFT_CUDA_TRY(cudaGetDevice(&dev_id)); From 17d34ae8ce732afb9ffda5221c0906dea2042b29 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 22 Jan 2026 23:21:36 +0000 Subject: [PATCH 071/158] remove too many underscores --- .../cuvs/detail/jit_lto/FragmentDatabase.h | 2 +- cpp/include/cuvs/detail/jit_lto/FragmentEntry.h | 4 ++-- .../cuvs/detail/jit_lto/MakeFragmentKey.h | 1 + cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 2 +- cpp/src/detail/jit_lto/FragmentDatabase.cu | 16 +++++++++------- cpp/src/detail/jit_lto/FragmentEntry.cu | 6 +++--- .../jit_lto_kernels/interleaved_scan_planner.hpp | 4 ++-- 7 files changed, 19 insertions(+), 16 deletions(-) diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h index b3699d7c6d..890796ba44 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h @@ -25,7 +25,7 @@ class FragmentDatabase { private: FragmentDatabase(); - bool make_cache_entry(std::string const& name, std::string const& params); + bool make_cache_entry(std::string const& key); friend FragmentDatabase& fragment_database(); diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h index ed0edd59b2..a376068425 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h @@ -13,7 +13,7 @@ #include struct FragmentEntry { - FragmentEntry(std::string const& params); + FragmentEntry(std::string const& key); bool operator==(const FragmentEntry& rhs) const { return compute_key == rhs.compute_key; } @@ -23,7 +23,7 @@ struct FragmentEntry { }; struct FatbinFragmentEntry final : FragmentEntry { - FatbinFragmentEntry(std::string const& params, unsigned char const* view, std::size_t size); + FatbinFragmentEntry(std::string const& key, unsigned char const* view, std::size_t size); virtual bool add_to(nvJitLinkHandle& handle) const; diff --git a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h index eea74b782a..21482d5234 100644 --- a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h +++ b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h @@ -26,5 +26,6 @@ std::string make_fragment_key() { std::string result; ((result += detail::type_as_string() + "_"), ...); + if (!result.empty()) { result.pop_back(); } return result; } diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index c5d954ff79..693e2ee685 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -38,7 +38,7 @@ std::string AlgorithmPlanner::get_device_functions_key() const { std::string key = ""; for (const auto& device_function : this->device_functions) { - key += "_" + device_function; + key += device_function; } return key; } diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index 89fa415b83..ea43b987fb 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -10,10 +10,10 @@ FragmentDatabase::FragmentDatabase() {} -bool FragmentDatabase::make_cache_entry(std::string const& name, std::string const& params) +bool FragmentDatabase::make_cache_entry(std::string const& key) { - if (this->cache.count(name + "_" + params) == 0) { - this->cache[name + "_" + params] = std::unique_ptr{}; + if (this->cache.count(key) == 0) { + this->cache[key] = std::unique_ptr{}; return false; } return true; @@ -29,7 +29,7 @@ FragmentEntry* FragmentDatabase::get_fragment(std::string const& key) { auto& db = fragment_database(); auto val = db.cache.find(key); - RAFT_EXPECTS(val != db.cache.end(), "FragmentDatabase: Key not found"); + RAFT_EXPECTS(val != db.cache.end(), "FragmentDatabase: Key not found: %s", key.c_str()); return val->second.get(); } @@ -38,8 +38,10 @@ void registerFatbinFragment(std::string const& algo, unsigned char const* blob, std::size_t size) { - auto& planner = fragment_database(); - auto entry_exists = planner.make_cache_entry(algo, params); + auto& planner = fragment_database(); + std::string key = algo; + if (!params.empty()) { key += "_" + params; } + auto entry_exists = planner.make_cache_entry(key); if (entry_exists) { return; } - planner.cache[algo + "_" + params] = std::make_unique(params, blob, size); + planner.cache[key] = std::make_unique(key, blob, size); } diff --git a/cpp/src/detail/jit_lto/FragmentEntry.cu b/cpp/src/detail/jit_lto/FragmentEntry.cu index ecc94faf76..7f0f83ebb9 100644 --- a/cpp/src/detail/jit_lto/FragmentEntry.cu +++ b/cpp/src/detail/jit_lto/FragmentEntry.cu @@ -9,12 +9,12 @@ #include -FragmentEntry::FragmentEntry(std::string const& params) : compute_key(params) {} +FragmentEntry::FragmentEntry(std::string const& key) : compute_key(key) {} -FatbinFragmentEntry::FatbinFragmentEntry(std::string const& params, +FatbinFragmentEntry::FatbinFragmentEntry(std::string const& key, unsigned char const* view, std::size_t size) - : FragmentEntry(params), data_size(size), data_view(view) + : FragmentEntry(key), data_size(size), data_view(view) { } diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp index beab9e858b..1a8217524a 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -33,13 +33,13 @@ struct InterleavedScanPlanner : AlgorithmPlanner { void add_filter_device_function(std::string filter_name) { - auto key = filter_name + "_"; + auto key = filter_name; this->device_functions.push_back(key); } void add_post_lambda_device_function(std::string post_lambda_name) { - auto key = post_lambda_name + "_"; + auto key = post_lambda_name; this->device_functions.push_back(key); } }; From 45a51463cd359bd7ea049852b4e8d13ab38fc80c Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 26 Jan 2026 12:13:46 -0600 Subject: [PATCH 072/158] FEA Add initial commit of prototype/pseudo-code for proposed UDF APIs and README --- .../cuvs/udf/README_UDF_API_PROPOSAL.md | 226 ++++++++++++++++ cpp/include/cuvs/udf/compiler.hpp | 101 ++++++++ cpp/include/cuvs/udf/metric_interface.cuh | 61 +++++ cpp/include/cuvs/udf/metric_macro.hpp | 73 ++++++ cpp/include/cuvs/udf/metric_source.hpp | 72 ++++++ cpp/include/cuvs/udf/packed_helpers.cuh | 80 ++++++ cpp/include/cuvs/udf/point.cuh | 220 ++++++++++++++++ cpp/src/udf/compiler.cpp | 242 ++++++++++++++++++ examples/cpp/src/udf_chebyshev_metric.cu | 93 +++++++ examples/cpp/src/udf_int8_metric.cu | 117 +++++++++ examples/cpp/src/udf_simple_metric.cu | 98 +++++++ examples/cpp/src/udf_weighted_metric.cu | 133 ++++++++++ 12 files changed, 1516 insertions(+) create mode 100644 cpp/include/cuvs/udf/README_UDF_API_PROPOSAL.md create mode 100644 cpp/include/cuvs/udf/compiler.hpp create mode 100644 cpp/include/cuvs/udf/metric_interface.cuh create mode 100644 cpp/include/cuvs/udf/metric_macro.hpp create mode 100644 cpp/include/cuvs/udf/metric_source.hpp create mode 100644 cpp/include/cuvs/udf/packed_helpers.cuh create mode 100644 cpp/include/cuvs/udf/point.cuh create mode 100644 cpp/src/udf/compiler.cpp create mode 100644 examples/cpp/src/udf_chebyshev_metric.cu create mode 100644 examples/cpp/src/udf_int8_metric.cu create mode 100644 examples/cpp/src/udf_simple_metric.cu create mode 100644 examples/cpp/src/udf_weighted_metric.cu diff --git a/cpp/include/cuvs/udf/README_UDF_API_PROPOSAL.md b/cpp/include/cuvs/udf/README_UDF_API_PROPOSAL.md new file mode 100644 index 0000000000..74d9b65385 --- /dev/null +++ b/cpp/include/cuvs/udf/README_UDF_API_PROPOSAL.md @@ -0,0 +1,226 @@ +# cuVS UDF (User-Defined Function) API Proposal + +This folder contains a proposed UDF API for cuVS custom distance metrics. + +## Design Overview + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ User writes (via macro): │ +│ │ +│ CUVS_METRIC(my_l2, { │ +│ acc += cuvs::udf::squared_diff(x, y); // Just works! │ +│ }) │ +│ │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ x and y are point which provides: │ +│ │ +│ x.raw() - Raw packed storage (for power users) │ +│ x[i] - Unpacked element access │ +│ x.size() - Number of elements (4 for packed int8, 1 for float) │ +│ x.is_packed() - Whether data is packed (constexpr) │ +│ │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ Helper functions deduce Veclen automatically: │ +│ │ +│ cuvs::udf::squared_diff(x, y) - (x-y)² optimal for ALL types │ +│ cuvs::udf::abs_diff(x, y) - |x-y| optimal for ALL types │ +│ cuvs::udf::dot_product(x, y) - x·y optimal for ALL types │ +│ │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ At runtime, cuVS wraps raw values and calls your metric: │ +│ │ +│ point_t x{x_raw}, y{y_raw}; │ +│ my_l2{}(acc, x, y); │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## Files + +``` +cpp/include/cuvs/udf/ +├── README_UDF_API_PROPOSAL.md # This file +├── point.cuh # point wrapper + helpers +├── metric_interface.cuh # Base interface (compile-time safety) +├── metric_macro.hpp # CUVS_METRIC macro +├── metric_source.hpp # metric_source struct +├── compiler.hpp # Internal JIT compiler interface +└── packed_helpers.cuh # [DEPRECATED] Legacy helpers + +cpp/src/udf/ +└── compiler.cpp # JIT compiler implementation + +examples/cpp/src/ +├── udf_simple_metric.cu # Basic example +├── udf_chebyshev_metric.cu # L∞ distance +├── udf_int8_metric.cu # int8/uint8 - now easy! +└── udf_weighted_metric.cu # Custom headers +``` + +## Quick Start + +### 1. Define Your Metric - The Easy Way + +```cpp +#include + +// L2 distance - works for float, half, int8, uint8! +CUVS_METRIC(my_l2, { + acc += cuvs::udf::squared_diff(x, y); +}) + +// L1 distance - works for all types! +CUVS_METRIC(my_l1, { + acc += cuvs::udf::abs_diff(x, y); +}) + +// Inner product - works for all types! +CUVS_METRIC(my_dot, { + acc += cuvs::udf::dot_product(x, y); +}) +``` + +### 2. Custom Logic with Element Access + +```cpp +// Chebyshev (L∞) distance - max absolute difference +CUVS_METRIC(chebyshev, { + for (int i = 0; i < x.size(); ++i) { + auto diff = (x[i] > y[i]) ? (x[i] - y[i]) : (y[i] - x[i]); + if (diff > acc) acc = static_cast(diff); + } +}) + +// Per-dimension weighted L2 +CUVS_METRIC(weighted_l2, { + for (int i = 0; i < x.size(); ++i) { + auto diff = x[i] - y[i]; + auto weight = AccT{1} + AccT{i} * AccT{0.1}; // Custom weights + acc += weight * diff * diff; + } +}) +``` + +### 3. Use in Search + +```cpp +#include + +int main() { + auto index = cuvs::neighbors::ivf_flat::deserialize(res, "index.bin"); + + cuvs::neighbors::ivf_flat::search_params params; + params.udf.metric = my_l2_udf(); // Auto-generated function! + + cuvs::neighbors::ivf_flat::search(res, params, index, queries, neighbors, distances); +} +``` + +## The `point` Wrapper + +The key innovation is wrapping raw values in `point`: + +```cpp +template +struct point { + storage_type data_; + + // Raw access for power users + __device__ storage_type raw() const; + + // Element access - handles unpacking automatically + __device__ T operator[](int i) const; + + // Compile-time queries + static constexpr int size(); // 4 for packed int8, 1 for float + static constexpr bool is_packed(); // true for int8/uint8 with Veclen > 1 +}; +``` + +### Benefits + +1. **Helpers deduce Veclen automatically** - no template args needed! +2. **Element access `x[i]`** - unpacks automatically for int8/uint8 +3. **Type queries** - `is_packed()`, `size()` for conditional logic +4. **Raw access** - `x.raw()` for power users who need intrinsics + +## Helper Functions + +All helpers deduce `Veclen` from the `point` type - no manual template args! + +| Helper | Description | int8/uint8 Implementation | +|--------|-------------|---------------------------| +| `squared_diff(x, y)` | (x-y)² | `__vabsdiffs4` + `__dp4a` | +| `abs_diff(x, y)` | \|x-y\| | `__vabsdiffs4` + byte sum | +| `dot_product(x, y)` | x·y | `__dp4a` | +| `product(x, y)` | element-wise × | `__dp4a` | +| `sum(x, y)` | element-wise + | unpacked loop | +| `max_elem(x, y)` | max element | unpacked loop | + +## Supported Types + +| Data Type | Accumulator | `x.size()` | Complexity | +|-----------|-------------|------------|------------| +| `float` | `float` | 1 | ⭐ Easy | +| `__half` | `__half` | 1 | ⭐ Easy | +| `int8_t` | `int32_t` | 4 (packed) | ⭐ Easy with helpers! | +| `uint8_t` | `uint32_t` | 4 (packed) | ⭐ Easy with helpers! | + +### int8/uint8 - Now Easy! + +**Before** (manual intrinsics): +```cpp +CUVS_METRIC(old_way, { + if constexpr (std::is_same_v && Veclen > 1) { + auto diff = __vabsdiffs4(x, y); // Must know this! + acc = raft::dp4a(diff, diff, acc); // And this! + } else { + auto diff = x - y; + acc += diff * diff; + } +}) +``` + +**After** (with point wrapper): +```cpp +CUVS_METRIC(new_way, { + acc += cuvs::udf::squared_diff(x, y); // Just works! +}) +``` + +## Power User Mode + +For maximum control, you can still access raw storage and use intrinsics: + +```cpp +CUVS_METRIC(power_user, { + if constexpr (decltype(x)::is_packed()) { + // Use SIMD intrinsics directly + acc = __dp4a(x.raw(), y.raw(), acc); + } else { + acc += x.raw() * y.raw(); + } +}) +``` + +## Error Handling + +```cpp +try { + search(res, params, index, queries, neighbors, distances); +} catch (const cuvs::udf::compilation_error& e) { + std::cerr << "UDF compilation failed:\n" << e.what() << std::endl; +} +``` + +## Summary + +| Feature | Benefit | +|---------|---------| +| **point wrapper** | Clean API, no raw AccT confusion | +| **Helper functions** | No intrinsics knowledge needed | +| **Auto Veclen deduction** | No template args in helpers | +| **Element access `x[i]`** | Custom logic without intrinsics | +| **Compile-time queries** | `is_packed()`, `size()` for branching | +| **Raw access `x.raw()`** | Power users can still use intrinsics | diff --git a/cpp/include/cuvs/udf/compiler.hpp b/cpp/include/cuvs/udf/compiler.hpp new file mode 100644 index 0000000000..7f979772b1 --- /dev/null +++ b/cpp/include/cuvs/udf/compiler.hpp @@ -0,0 +1,101 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "metric_source.hpp" + +namespace cuvs::udf::detail { + +/** + * @brief Compiled UDF fragment (LTO-IR). + */ +struct compiled_fragment { + std::vector lto_ir; + size_t size() const { return lto_ir.size(); } + const char* data() const { return lto_ir.data(); } +}; + +/** + * @brief Cache key for compiled UDFs. + */ +struct cache_key { + std::string source_hash; + std::string struct_name; + int veclen; + std::string data_type; + std::string acc_type; + int compute_capability; + + bool operator==(const cache_key& other) const + { + return source_hash == other.source_hash && struct_name == other.struct_name && + veclen == other.veclen && data_type == other.data_type && acc_type == other.acc_type && + compute_capability == other.compute_capability; + } +}; + +struct cache_key_hash { + size_t operator()(const cache_key& k) const; +}; + +/** + * @brief Thread-safe cache for compiled UDF fragments. + */ +class udf_cache { + public: + static udf_cache& instance(); + + std::shared_ptr get(const cache_key& key); + void put(const cache_key& key, std::shared_ptr fragment); + void clear(); + + private: + udf_cache() = default; + std::unordered_map, cache_key_hash> cache_; + std::mutex mutex_; +}; + +/** + * @brief Build the full source code for JIT compilation. + * + * Takes the user's struct source and appends: + * 1. Standard includes + * 2. The compute_dist wrapper function + * 3. The explicit instantiation for the given Veclen/T/AccT + * + * @param udf User's metric source (struct only) + * @param veclen Vector length from index + * @param data_type Data type string (e.g., "float") + * @param acc_type Accumulator type string (e.g., "float") + * @return Complete source ready for NVRTC + */ +std::string build_full_source(const metric_source& udf, + int veclen, + const std::string& data_type, + const std::string& acc_type); + +/** + * @brief Compile a UDF metric source to LTO-IR. + * + * @param udf The user's metric source + * @param veclen Vector length from index + * @param data_type Data type string + * @param acc_type Accumulator type string + * @return Compiled fragment (LTO-IR) + * @throws compilation_error if NVRTC compilation fails + */ +std::shared_ptr compile_metric(const metric_source& udf, + int veclen, + const std::string& data_type, + const std::string& acc_type); + +} // namespace cuvs::udf::detail diff --git a/cpp/include/cuvs/udf/metric_interface.cuh b/cpp/include/cuvs/udf/metric_interface.cuh new file mode 100644 index 0000000000..4c36116e5f --- /dev/null +++ b/cpp/include/cuvs/udf/metric_interface.cuh @@ -0,0 +1,61 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "point.cuh" + +namespace cuvs::udf { + +/** + * @brief Base interface for custom distance metrics. + * + * Inherit from this interface to get compile-time enforcement of the + * correct operator() signature via the `override` keyword. + * + * If you forget to implement operator() or use the wrong signature, + * you'll get a clear compile error: "does not override any member function" + * + * @tparam T Data type (float, __half, int8_t, uint8_t) + * @tparam AccT Accumulator type (float, __half, int32_t, uint32_t) + * @tparam Veclen Vector length (handled by cuVS internally) + * + * @note x and y are point which provides: + * - .raw() : packed storage for power users + * - operator[] : unpacked element access + * - ::veclen : compile-time Veclen + * - ::is_packed() : whether data is packed + */ +template +struct metric_interface { + using point_type = point; + + /** + * @brief Compute distance contribution for one element pair. + * + * @param[in,out] acc Accumulated distance value + * @param[in] x Query vector element (point wrapper) + * @param[in] y Database vector element (point wrapper) + * + * Example: + * // Simple - use helpers (recommended): + * acc += squared_diff(x, y); + * + * // Array access for custom logic: + * for (int i = 0; i < x.size(); ++i) { + * acc += x[i] * y[i]; + * } + * + * // Power user - raw access: + * if constexpr (point_type::is_packed()) { + * acc = __dp4a(x.raw(), y.raw(), acc); + * } + */ + virtual __device__ void operator()(AccT& acc, point_type x, point_type y) = 0; + + virtual __device__ ~metric_interface() = default; +}; + +} // namespace cuvs::udf diff --git a/cpp/include/cuvs/udf/metric_macro.hpp b/cpp/include/cuvs/udf/metric_macro.hpp new file mode 100644 index 0000000000..69ad8157ed --- /dev/null +++ b/cpp/include/cuvs/udf/metric_macro.hpp @@ -0,0 +1,73 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "metric_interface.cuh" +#include "metric_source.hpp" +#include "point.cuh" + +/** + * @brief Define a custom distance metric with compile-time validation. + * + * This macro creates: + * 1. A struct that inherits from metric_interface (compile-time validation) + * 2. A function NAME_udf() that returns a metric_source for JIT compilation + * + * @param NAME The name of your metric (becomes struct name and function prefix) + * @param BODY The body of operator()(AccT& acc, point_type x, point_type y) + * + * Available in BODY: + * acc - Accumulated distance (AccT&, modify in-place) + * x, y - Vector elements (point) + * T - Data type (float, __half, int8_t, uint8_t) + * AccT - Accumulator type + * Veclen - Vector length (compile-time constant) + * + * x and y provide: + * x.raw() - Raw packed storage (for power users) + * x[i] - Unpacked element access + * x.size() - Number of elements (4 for packed int8, 1 for float) + * x.is_packed() - Whether data is packed (constexpr) + * + * Helper functions (Veclen deduced automatically!): + * cuvs::udf::squared_diff(x, y) - (x-y)² optimized for all types + * cuvs::udf::abs_diff(x, y) - |x-y| optimized for all types + * cuvs::udf::dot_product(x, y) - x·y optimized for all types + * cuvs::udf::product(x, y) - element-wise product + * + * Example: + * CUVS_METRIC(my_l2, { + * acc += cuvs::udf::squared_diff(x, y); // Just works for all types! + * }) + * + * CUVS_METRIC(my_chebyshev, { + * for (int i = 0; i < x.size(); ++i) { + * auto diff = (x[i] > y[i]) ? (x[i] - y[i]) : (y[i] - x[i]); + * if (diff > acc) acc = diff; + * } + * }) + */ +#define CUVS_METRIC(NAME, BODY) \ + template \ + struct NAME : cuvs::udf::metric_interface { \ + using point_type = cuvs::udf::point; \ + __device__ void operator()(AccT& acc, point_type x, point_type y) override { BODY } \ + }; \ + \ + inline cuvs::udf::metric_source NAME##_udf() \ + { \ + return cuvs::udf::metric_source{ \ + .source = R"( \ +template \ +struct )" #NAME R"( : cuvs::udf::metric_interface { \ + using point_type = cuvs::udf::point; \ + __device__ void operator()(AccT& acc, point_type x, point_type y) override \ +)" #BODY R"( \ +}; \ +)", \ + .struct_name = #NAME, \ + .headers = {}}; \ + } diff --git a/cpp/include/cuvs/udf/metric_source.hpp b/cpp/include/cuvs/udf/metric_source.hpp new file mode 100644 index 0000000000..4344f8e923 --- /dev/null +++ b/cpp/include/cuvs/udf/metric_source.hpp @@ -0,0 +1,72 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include + +namespace cuvs::udf { + +/** + * @brief Source definition for a custom metric. + * + * Contains the struct source code and metadata needed for JIT compilation. + * Typically created via the CUVS_METRIC macro (see metric_macro.hpp). + */ +struct metric_source { + /** + * @brief CUDA source code containing the metric struct. + * + * Should define a template struct: + * template + * struct your_name { + * __device__ void operator()(AccT& acc, AccT x, AccT y) { ... } + * }; + * + * Note: Do NOT include the compute_dist wrapper or explicit instantiation. + * cuVS appends those automatically based on index properties. + */ + std::string source; + + /** + * @brief Name of the metric struct (without template parameters). + * + * cuVS uses this to generate: + * your_name{}(acc, x, y); + */ + std::string struct_name; + + /** + * @brief Optional headers the metric depends on. + * + * Map of header name -> header content. + * Passed to NVRTC's virtual filesystem. + */ + std::unordered_map headers; +}; + +/** + * @brief UDF configuration for search parameters. + */ +struct udf_config { + std::optional metric; + + // Future extensions: + // std::optional sample_filter; + // std::optional post_process; +}; + +/** + * @brief Exception thrown when UDF JIT compilation fails. + */ +class compilation_error : public std::runtime_error { + public: + explicit compilation_error(const std::string& msg) : std::runtime_error(msg) {} +}; + +} // namespace cuvs::udf diff --git a/cpp/include/cuvs/udf/packed_helpers.cuh b/cpp/include/cuvs/udf/packed_helpers.cuh new file mode 100644 index 0000000000..1891343239 --- /dev/null +++ b/cpp/include/cuvs/udf/packed_helpers.cuh @@ -0,0 +1,80 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +// ============================================================================= +// DEPRECATED: Use point.cuh helpers instead! +// +// Old API: +// acc += cuvs::udf::packed_sq_diff_u8(x, y); +// +// New API (recommended): +// acc += cuvs::udf::squared_diff(x, y); // Works for ALL types! +// +// The new helpers take point and deduce types automatically. +// ============================================================================= + +#include "point.cuh" + +namespace cuvs::udf { + +// Legacy helpers for raw packed values (use point-based helpers instead!) + +/** + * @brief [DEPRECATED] Use squared_diff(x, y) with point wrapper instead. + */ +__device__ __forceinline__ uint32_t packed_sq_diff_u8(uint32_t x, uint32_t y) +{ + auto diff = __vabsdiffu4(x, y); + return __dp4a(diff, diff, 0u); +} + +/** + * @brief [DEPRECATED] Use squared_diff(x, y) with point wrapper instead. + */ +__device__ __forceinline__ int32_t packed_sq_diff_i8(int32_t x, int32_t y) +{ + auto diff = __vabsdiffs4(x, y); + return __dp4a(diff, diff, 0); +} + +/** + * @brief [DEPRECATED] Use abs_diff(x, y) with point wrapper instead. + */ +__device__ __forceinline__ uint32_t packed_l1_u8(uint32_t x, uint32_t y) +{ + auto diff = __vabsdiffu4(x, y); + return (diff & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + ((diff >> 24) & 0xFF); +} + +/** + * @brief [DEPRECATED] Use abs_diff(x, y) with point wrapper instead. + */ +__device__ __forceinline__ int32_t packed_l1_i8(int32_t x, int32_t y) +{ + auto diff = __vabsdiffs4(x, y); + uint32_t udiff = static_cast(diff); + return static_cast((udiff & 0xFF) + ((udiff >> 8) & 0xFF) + ((udiff >> 16) & 0xFF) + + ((udiff >> 24) & 0xFF)); +} + +/** + * @brief [DEPRECATED] Use dot_product(x, y) with point wrapper instead. + */ +__device__ __forceinline__ uint32_t packed_dot_u8(uint32_t x, uint32_t y) +{ + return __dp4a(x, y, 0u); +} + +/** + * @brief [DEPRECATED] Use dot_product(x, y) with point wrapper instead. + */ +__device__ __forceinline__ int32_t packed_dot_i8(int32_t x, int32_t y) +{ + return __dp4a(x, y, 0); +} + +} // namespace cuvs::udf diff --git a/cpp/include/cuvs/udf/point.cuh b/cpp/include/cuvs/udf/point.cuh new file mode 100644 index 0000000000..903205d494 --- /dev/null +++ b/cpp/include/cuvs/udf/point.cuh @@ -0,0 +1,220 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include + +namespace cuvs::udf { + +/** + * @brief Wrapper for vector elements that provides both packed and unpacked access. + * + * For float/half: trivial wrapper around scalar values + * For int8/uint8 with Veclen > 1: wraps packed bytes in a 32-bit word + * + * @tparam T Data type (float, __half, int8_t, uint8_t) + * @tparam AccT Storage/accumulator type (float, __half, int32_t, uint32_t) + * @tparam Veclen Vector length (1, 2, 4, 8, 16) + * + * Usage: + * // Helpers deduce Veclen automatically: + * acc += cuvs::udf::squared_diff(x, y); // No template args! + * + * // Array access for custom logic (slower but flexible): + * for (int i = 0; i < x.size(); ++i) { + * acc += x[i] * y[i]; + * } + * + * // Query packing: + * if constexpr (decltype(x)::is_packed()) { ... } + */ +template +struct point { + using element_type = T; + using storage_type = AccT; + static constexpr int veclen = Veclen; + + storage_type data_; + + // ============================================================ + // Constructors + // ============================================================ + + __device__ __host__ point() = default; + __device__ __host__ explicit point(storage_type d) : data_(d) {} + + // ============================================================ + // Raw access (for power users who need intrinsics) + // ============================================================ + + __device__ __forceinline__ storage_type raw() const { return data_; } + __device__ __forceinline__ storage_type& raw() { return data_; } + + // ============================================================ + // Compile-time queries + // ============================================================ + + __device__ __host__ static constexpr int size() + { + // For packed int8/uint8: 4 elements per storage word + if constexpr ((std::is_same_v || std::is_same_v) && Veclen > 1) { + return 4; + } else { + return 1; + } + } + + __device__ __host__ static constexpr bool is_packed() + { + return (std::is_same_v || std::is_same_v) && Veclen > 1; + } + + // ============================================================ + // Element access (unpacks for int8/uint8) + // ============================================================ + + __device__ __forceinline__ T operator[](int i) const + { + if constexpr (std::is_same_v && Veclen > 1) { + // Extract signed byte i from packed int32_t + return static_cast((data_ >> (i * 8)) & 0xFF); + } else if constexpr (std::is_same_v && Veclen > 1) { + // Extract unsigned byte i from packed uint32_t + return static_cast((data_ >> (i * 8)) & 0xFF); + } else { + // Scalar types: only one element + (void)i; // Unused + return static_cast(data_); + } + } +}; + +// ============================================================ +// Helper Operations - Deduce Veclen from point type! +// ============================================================ + +/** + * @brief Squared difference: (x - y)² + * + * Optimized for packed int8/uint8, falls back to scalar for float/half. + */ +template +__device__ __forceinline__ AccT squared_diff(point x, point y) +{ + if constexpr (std::is_same_v && V > 1) { + // SIMD: 4 packed unsigned bytes + auto diff = __vabsdiffu4(x.raw(), y.raw()); + return __dp4a(diff, diff, AccT{0}); + } else if constexpr (std::is_same_v && V > 1) { + // SIMD: 4 packed signed bytes + auto diff = __vabsdiffs4(x.raw(), y.raw()); + return __dp4a(diff, diff, static_cast(0)); + } else { + // Scalar: float, half, or byte with Veclen==1 + auto diff = x.raw() - y.raw(); + return diff * diff; + } +} + +/** + * @brief Absolute difference: |x - y| + * + * For packed types, returns sum of absolute differences. + */ +template +__device__ __forceinline__ AccT abs_diff(point x, point y) +{ + if constexpr (std::is_same_v && V > 1) { + // SIMD: sum of 4 unsigned absolute differences + auto diff = __vabsdiffu4(x.raw(), y.raw()); + // Sum the 4 bytes + return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + + ((diff >> 24) & 0xFF); + } else if constexpr (std::is_same_v && V > 1) { + // SIMD: sum of 4 signed absolute differences + auto diff = __vabsdiffs4(x.raw(), y.raw()); + return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + + ((diff >> 24) & 0xFF); + } else { + // Scalar + auto a = x.raw(); + auto b = y.raw(); + return (a > b) ? (a - b) : (b - a); + } +} + +/** + * @brief Dot product: x · y + * + * For packed types, computes sum of element-wise products. + */ +template +__device__ __forceinline__ AccT dot_product(point x, point y) +{ + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + // SIMD: dp4a computes dot product of 4 packed bytes + return __dp4a(x.raw(), y.raw(), AccT{0}); + } else { + // Scalar + return x.raw() * y.raw(); + } +} + +/** + * @brief Element-wise product: x * y + * + * For packed types, returns sum of element-wise products (same as dot_product). + */ +template +__device__ __forceinline__ AccT product(point x, point y) +{ + return dot_product(x, y); +} + +/** + * @brief Element-wise sum: x + y + */ +template +__device__ __forceinline__ AccT sum(point x, point y) +{ + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + // Sum all unpacked elements + AccT result = 0; + for (int i = 0; i < x.size(); ++i) { + result += static_cast(x[i]) + static_cast(y[i]); + } + return result; + } else { + return x.raw() + y.raw(); + } +} + +/** + * @brief Maximum element: max(x, y) + * + * For packed types, returns max across all element pairs. + */ +template +__device__ __forceinline__ AccT max_elem(point x, point y) +{ + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + AccT result = 0; + for (int i = 0; i < x.size(); ++i) { + auto xi = static_cast(x[i]); + auto yi = static_cast(y[i]); + auto val = (xi > yi) ? xi : yi; + if (val > result) result = val; + } + return result; + } else { + auto a = x.raw(); + auto b = y.raw(); + return (a > b) ? a : b; + } +} + +} // namespace cuvs::udf diff --git a/cpp/src/udf/compiler.cpp b/cpp/src/udf/compiler.cpp new file mode 100644 index 0000000000..e57a03077c --- /dev/null +++ b/cpp/src/udf/compiler.cpp @@ -0,0 +1,242 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file compiler.cpp + * @brief Implementation of UDF JIT compiler using NVRTC + * + * This file shows how cuVS implements JIT compilation of user-defined metrics. + * Key responsibility: append the compute_dist wrapper and explicit instantiation + * to the user's struct definition. + */ + +#include + +#include +#include +#include + +#include +#include +#include + +namespace cuvs::udf::detail { + +// ============================================================ +// Cache Implementation +// ============================================================ + +size_t cache_key_hash::operator()(const cache_key& k) const +{ + size_t h = 0; + auto hash_combine = [&h](const auto& v) { + h ^= std::hash>{}(v) + 0x9e3779b9 + (h << 6) + (h >> 2); + }; + + hash_combine(k.source_hash); + hash_combine(k.struct_name); + hash_combine(k.veclen); + hash_combine(k.data_type); + hash_combine(k.acc_type); + hash_combine(k.compute_capability); + + return h; +} + +udf_cache& udf_cache::instance() +{ + static udf_cache cache; + return cache; +} + +std::shared_ptr udf_cache::get(const cache_key& key) +{ + std::lock_guard lock(mutex_); + auto it = cache_.find(key); + if (it != cache_.end()) { return it->second; } + return nullptr; +} + +void udf_cache::put(const cache_key& key, std::shared_ptr fragment) +{ + std::lock_guard lock(mutex_); + cache_[key] = std::move(fragment); +} + +void udf_cache::clear() +{ + std::lock_guard lock(mutex_); + cache_.clear(); +} + +// ============================================================ +// Source Building - THE KEY FUNCTION +// ============================================================ + +std::string build_full_source(const metric_source& udf, + int veclen, + const std::string& data_type, + const std::string& acc_type) +{ + std::stringstream ss; + + // 1. Standard includes + ss << "#include \n"; + ss << "#include \n"; + ss << "#include \n\n"; + + // 2. Include the point wrapper and metric interface + ss << "#include \n"; + ss << "#include \n\n"; + + // 3. Open namespace + ss << "namespace cuvs::neighbors::ivf_flat::detail {\n\n"; + + // 4. User's struct definition (from metric_source.source) + // This is ONLY the struct - no wrapper, no instantiation + ss << "// User-defined metric struct\n"; + ss << udf.source << "\n\n"; + + // 5. cuVS adds the compute_dist wrapper function + // This calls the user's struct with point-wrapped arguments + ss << "// cuVS-generated wrapper function\n"; + ss << "template \n"; + ss << "__device__ void compute_dist(AccT& acc, AccT x_raw, AccT y_raw) {\n"; + ss << " // Wrap raw values in point\n"; + ss << " using point_t = cuvs::udf::point;\n"; + ss << " point_t x{x_raw};\n"; + ss << " point_t y{y_raw};\n"; + ss << " " << udf.struct_name << "{}(acc, x, y);\n"; + ss << "}\n\n"; + + // 6. cuVS adds the explicit instantiation + // Based on index.veclen() and index.data_type() + ss << "// cuVS-generated explicit instantiation\n"; + ss << "template __device__ void compute_dist<" << veclen << ", " << data_type << ", " << acc_type + << ">(" << acc_type << "&, " << acc_type << ", " << acc_type << ");\n\n"; + + // 7. Close namespace + ss << "} // namespace cuvs::neighbors::ivf_flat::detail\n"; + + return ss.str(); +} + +// ============================================================ +// Hash helper +// ============================================================ + +static std::string compute_source_hash(const std::string& source) +{ + std::hash hasher; + return std::to_string(hasher(source)); +} + +// ============================================================ +// NVRTC Error Checking +// ============================================================ + +static void check_nvrtc(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) { + std::stringstream ss; + ss << msg << ": " << nvrtcGetErrorString(result); + throw compilation_error(ss.str()); + } +} + +// ============================================================ +// Main Compilation Function +// ============================================================ + +std::shared_ptr compile_metric(const metric_source& udf, + int veclen, + const std::string& data_type, + const std::string& acc_type) +{ + // 1. Get device compute capability + int device; + cudaGetDevice(&device); + cudaDeviceProp props; + cudaGetDeviceProperties(&props, device); + int cc = props.major * 10 + props.minor; + + // 2. Check cache first + cache_key key{.source_hash = compute_source_hash(udf.source), + .struct_name = udf.struct_name, + .veclen = veclen, + .data_type = data_type, + .acc_type = acc_type, + .compute_capability = cc}; + + auto& cache = udf_cache::instance(); + if (auto cached = cache.get(key)) { return cached; } + + // 3. Build full source (user struct + wrapper + instantiation) + std::string full_source = build_full_source(udf, veclen, data_type, acc_type); + + // 4. Prepare headers for NVRTC (include point.cuh and metric_interface.cuh) + std::vector header_names; + std::vector header_contents; + + for (const auto& [name, content] : udf.headers) { + header_names.push_back(name.c_str()); + header_contents.push_back(content.c_str()); + } + + // 5. Create NVRTC program + nvrtcProgram prog; + check_nvrtc(nvrtcCreateProgram(&prog, + full_source.c_str(), + "udf_metric.cu", + static_cast(header_names.size()), + header_contents.data(), + header_names.data()), + "Failed to create NVRTC program"); + + // 6. Compile options for LTO + std::string arch_opt = "--gpu-architecture=compute_" + std::to_string(cc); + + const char* opts[] = { + arch_opt.c_str(), + "-dlto", // Generate LTO-IR + "--relocatable-device-code=true", + "-std=c++17", + "-default-device", + }; + + nvrtcResult compile_result = nvrtcCompileProgram(prog, 5, opts); + + // 7. Get compilation log + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + + std::string log; + if (log_size > 1) { + log.resize(log_size); + nvrtcGetProgramLog(prog, log.data()); + } + + if (compile_result != NVRTC_SUCCESS) { + nvrtcDestroyProgram(&prog); + throw compilation_error("UDF compilation failed:\n" + log); + } + + // 8. Get LTO-IR + size_t lto_size; + check_nvrtc(nvrtcGetLTOIRSize(prog, <o_size), "Failed to get LTO-IR size"); + + auto fragment = std::make_shared(); + fragment->lto_ir.resize(lto_size); + check_nvrtc(nvrtcGetLTOIR(prog, fragment->lto_ir.data()), "Failed to get LTO-IR"); + + nvrtcDestroyProgram(&prog); + + // 9. Cache and return + cache.put(key, fragment); + + return fragment; +} + +} // namespace cuvs::udf::detail diff --git a/examples/cpp/src/udf_chebyshev_metric.cu b/examples/cpp/src/udf_chebyshev_metric.cu new file mode 100644 index 0000000000..59205536d7 --- /dev/null +++ b/examples/cpp/src/udf_chebyshev_metric.cu @@ -0,0 +1,93 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file udf_chebyshev_metric.cu + * @brief Example: Chebyshev (L∞) distance metric + * + * Chebyshev distance = max absolute difference across dimensions: + * d(x, y) = max_i |x_i - y_i| + * + * This example shows how to use element access for custom reduction logic. + */ + +#include +#include +#include + +#include + +// ============================================================ +// Chebyshev (L∞) Distance +// ============================================================ +// +// Unlike L2 (sum of squares) or L1 (sum of abs), Chebyshev +// tracks the MAXIMUM absolute difference seen so far. +// +// Uses element access x[i], y[i] for custom reduction. + +CUVS_METRIC(chebyshev_distance, { + for (int i = 0; i < x.size(); ++i) { + auto xi = x[i]; + auto yi = y[i]; + auto diff = (xi > yi) ? (xi - yi) : (yi - xi); + if (diff > acc) { acc = static_cast(diff); } + } +}) + +// ============================================================ +// Weighted L1 Distance - using helper +// ============================================================ + +CUVS_METRIC(weighted_l1, { + acc += cuvs::udf::abs_diff(x, y) * AccT{2.5}; // Custom weight +}) + +// ============================================================ +// Squared L2 (Euclidean) Distance - using helper +// ============================================================ + +CUVS_METRIC(squared_l2, { acc += cuvs::udf::squared_diff(x, y); }) + +// ============================================================ +// Minkowski Distance (p=3) - using element access +// ============================================================ + +CUVS_METRIC(minkowski_p3, { + for (int i = 0; i < x.size(); ++i) { + auto xi = x[i]; + auto yi = y[i]; + auto diff = (xi > yi) ? (xi - yi) : (yi - xi); + acc += diff * diff * diff; // |x-y|³ + } +}) + +int main() +{ + std::cout << "=== cuVS UDF Distance Metrics ===\n\n"; + + std::cout << "Defined metrics:\n"; + std::cout << " 1. chebyshev_distance - L∞ norm (max absolute diff)\n"; + std::cout << " 2. weighted_l1 - Weighted L1 distance\n"; + std::cout << " 3. squared_l2 - Standard squared Euclidean\n"; + std::cout << " 4. minkowski_p3 - Minkowski with p=3\n\n"; + + std::cout << "Usage:\n"; + std::cout << " params.udf.metric = chebyshev_distance_udf();\n"; + std::cout << " params.udf.metric = weighted_l1_udf();\n"; + std::cout << " params.udf.metric = squared_l2_udf();\n"; + std::cout << " params.udf.metric = minkowski_p3_udf();\n\n"; + + std::cout << "Two approaches for custom metrics:\n"; + std::cout << " 1. Use helpers: acc += cuvs::udf::squared_diff(x, y);\n"; + std::cout << " 2. Use element access: for (int i = 0; i < x.size(); ++i) {...}\n\n"; + + std::cout << "Use cases for Chebyshev distance:\n"; + std::cout << " - Image similarity (max pixel deviation)\n"; + std::cout << " - Quality control (worst-case tolerance)\n"; + std::cout << " - Game AI (king's movement on chessboard)\n"; + + return 0; +} diff --git a/examples/cpp/src/udf_int8_metric.cu b/examples/cpp/src/udf_int8_metric.cu new file mode 100644 index 0000000000..e698989ac7 --- /dev/null +++ b/examples/cpp/src/udf_int8_metric.cu @@ -0,0 +1,117 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file udf_int8_metric.cu + * @brief Example: int8/uint8 metrics - now EASY with point wrapper! + * + * The point wrapper makes int8/uint8 metrics trivial. + * No more manual intrinsics or if constexpr branches! + */ + +#include +#include +#include + +#include + +// ============================================================ +// Universal L2 Distance - ONE LINE! +// ============================================================ +// +// The helper squared_diff() handles ALL types optimally: +// - float/half: simple scalar math +// - int8/uint8: SIMD intrinsics (__vabsdiffs4, __dp4a) +// +// You don't need to know about packed types or intrinsics! + +CUVS_METRIC(universal_l2, { + acc += cuvs::udf::squared_diff(x, y); // Just works for everything! +}) + +// ============================================================ +// Universal L1 Distance - ONE LINE! +// ============================================================ + +CUVS_METRIC(universal_l1, { + acc += cuvs::udf::abs_diff(x, y); // Just works for everything! +}) + +// ============================================================ +// Universal Dot Product - ONE LINE! +// ============================================================ + +CUVS_METRIC(universal_dot, { + acc += cuvs::udf::dot_product(x, y); // Just works for everything! +}) + +// ============================================================ +// Custom logic using element access +// ============================================================ +// +// For custom logic, use x[i] and y[i] to access individual elements. +// The point wrapper handles unpacking automatically. + +CUVS_METRIC(custom_weighted_l2, { + // Access individual elements - works for all types + for (int i = 0; i < x.size(); ++i) { + auto diff = x[i] - y[i]; + auto weight = AccT{1} + AccT{i}; // Custom per-dimension weight + acc += weight * diff * diff; + } +}) + +// ============================================================ +// Power user: raw access with intrinsics +// ============================================================ +// +// For maximum performance, you can still use raw() and intrinsics. +// But now you don't HAVE to! + +CUVS_METRIC(power_user_l2, { + if constexpr (decltype(x)::is_packed()) { + // SIMD path - use intrinsics directly + auto diff = __vabsdiffs4(x.raw(), y.raw()); + acc = __dp4a(diff, diff, acc); + } else { + // Scalar path + auto diff = x.raw() - y.raw(); + acc += diff * diff; + } +}) + +int main() +{ + std::cout << "=== cuVS UDF int8/uint8 Metrics - Now Easy! ===\n\n"; + + std::cout << "OLD WAY (manual intrinsics):\n"; + std::cout << " if constexpr (std::is_same_v && Veclen > 1) {\n"; + std::cout << " auto diff = __vabsdiffs4(x, y); // Must know this!\n"; + std::cout << " acc = raft::dp4a(diff, diff, acc); // And this!\n"; + std::cout << " } else { ... }\n\n"; + + std::cout << "NEW WAY (with point wrapper):\n"; + std::cout << " acc += cuvs::udf::squared_diff(x, y); // Just works!\n\n"; + + std::cout << "Available helpers (auto-deduce Veclen):\n"; + std::cout << " squared_diff(x, y) - (x-y)² optimized for all types\n"; + std::cout << " abs_diff(x, y) - |x-y| optimized for all types\n"; + std::cout << " dot_product(x, y) - x·y optimized for all types\n"; + std::cout << " product(x, y) - element-wise product\n"; + std::cout << " sum(x, y) - element-wise sum\n"; + std::cout << " max_elem(x, y) - maximum element\n\n"; + + std::cout << "For custom logic, use element access:\n"; + std::cout << " for (int i = 0; i < x.size(); ++i) {\n"; + std::cout << " acc += custom_weight[i] * (x[i] - y[i]);\n"; + std::cout << " }\n\n"; + + std::cout << "Type info available at compile time:\n"; + std::cout << " x.size() - 4 for packed int8/uint8, 1 for float\n"; + std::cout << " x.is_packed() - true for int8/uint8 with Veclen > 1\n"; + std::cout << " x.raw() - raw storage for power users\n"; + + return 0; +} diff --git a/examples/cpp/src/udf_simple_metric.cu b/examples/cpp/src/udf_simple_metric.cu new file mode 100644 index 0000000000..58aba84e53 --- /dev/null +++ b/examples/cpp/src/udf_simple_metric.cu @@ -0,0 +1,98 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file udf_simple_metric.cu + * @brief Simple example: Custom "Over 9000" L1 distance metric + * + * Shows the minimal code needed to define and use a custom metric. + */ + +#include +#include +#include + +#include + +// ============================================================ +// Define your metric - ONE LINE (plus the body)! +// ============================================================ +// +// The CUVS_METRIC macro: +// 1. Creates the actual struct (compiled, validated with `override`) +// 2. Generates awesome_over_9000_udf() function returning source string +// +// Available variables in body: +// acc - accumulated distance (AccT&, modify in-place) +// x, y - vector elements (point) +// +// x and y provide: +// x.raw() - raw packed storage (power users) +// x[i] - element access (unpacked) +// x.size() - number of elements +// +// Helpers (Veclen deduced automatically!): +// cuvs::udf::squared_diff(x, y) - optimal for all types +// cuvs::udf::abs_diff(x, y) - optimal for all types +// cuvs::udf::dot_product(x, y) - optimal for all types + +CUVS_METRIC(awesome_over_9000, { + // IT'S OVER 9000!!!! + // Works for ALL types - float, half, int8, uint8! + auto diff = cuvs::udf::abs_diff(x, y); + acc += diff * AccT{9001}; +}) + +// That's it! The macro handles: +// - Struct definition with proper inheritance +// - operator() signature with `override` for validation +// - Source string generation for JIT +// - point wrapping for clean API + +int main() +{ + std::cout << "=== cuVS UDF Simple Example ===\n\n"; + + // ============================================================ + // Use in search + // ============================================================ + + // raft::device_resources res; + // auto index = cuvs::neighbors::ivf_flat::deserialize(res, "index.bin"); + + // cuvs::neighbors::ivf_flat::search_params params; + // params.n_probes = 50; + + // Use the auto-generated _udf() function! + // params.udf.metric = awesome_over_9000_udf(); + + // cuvs::neighbors::ivf_flat::search(res, params, index, queries, neighbors, distances); + + // ============================================================ + // What happens under the hood + // ============================================================ + + std::cout << "User writes:\n"; + std::cout << " CUVS_METRIC(awesome_over_9000, {\n"; + std::cout << " auto diff = cuvs::udf::abs_diff(x, y);\n"; + std::cout << " acc += diff * AccT{9001};\n"; + std::cout << " })\n\n"; + + std::cout << "x and y are point which provides:\n"; + std::cout << " - x.raw() : packed storage for intrinsics\n"; + std::cout << " - x[i] : unpacked element access\n"; + std::cout << " - x.size() : number of elements (4 for packed int8, 1 for float)\n"; + std::cout << " - x.is_packed(): whether data is packed\n\n"; + + std::cout << "Helper functions deduce Veclen automatically:\n"; + std::cout << " cuvs::udf::squared_diff(x, y) // No template args!\n"; + std::cout << " cuvs::udf::abs_diff(x, y)\n"; + std::cout << " cuvs::udf::dot_product(x, y)\n\n"; + + std::cout << "At runtime, cuVS wraps raw values in point\n"; + std::cout << "and calls your metric with the wrapped arguments.\n"; + + return 0; +} diff --git a/examples/cpp/src/udf_weighted_metric.cu b/examples/cpp/src/udf_weighted_metric.cu new file mode 100644 index 0000000000..ea1e13c89b --- /dev/null +++ b/examples/cpp/src/udf_weighted_metric.cu @@ -0,0 +1,133 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file udf_weighted_metric.cu + * @brief Example: Custom metric with helper headers + * + * This example shows how to use custom headers with your UDF metric. + * Headers are passed to NVRTC's virtual filesystem. + */ + +#include +#include +#include + +#include + +int main() +{ + raft::device_resources res; + + // ============================================================ + // Define helper header + // ============================================================ + // + // If your metric needs helper functions or constants, + // you can provide them as headers. + + std::string math_utils_header = R"( + #pragma once + + namespace my_utils { + + template + __device__ __forceinline__ T safe_abs(T x) { + return (x < T{0}) ? -x : x; + } + + template + __device__ __forceinline__ T clamp(T x, T lo, T hi) { + return (x < lo) ? lo : ((x > hi) ? hi : x); + } + + // Custom weight function - could be learned from data! + template + __device__ __forceinline__ T importance_weight() { + return T{2.5}; + } + + } // namespace my_utils + )"; + + // ============================================================ + // Define metric using the helper header and point wrapper + // ============================================================ + + cuvs::udf::metric_source weighted_metric = { + .source = R"( + #include "math_utils.cuh" + #include + + template + struct weighted_euclidean + : cuvs::udf::metric_interface + { + using point_type = cuvs::udf::point; + + __device__ void operator()(AccT& acc, point_type x, point_type y) override { + // Use helper for optimal squared diff + auto sq_diff = cuvs::udf::squared_diff(x, y); + + // Apply custom weight + auto weight = my_utils::importance_weight(); + acc += weight * sq_diff; + } + }; + )", + .struct_name = "weighted_euclidean", + + // Provide the header content + .headers = {{"math_utils.cuh", math_utils_header}}}; + + // ============================================================ + // Alternative: Per-dimension weights using element access + // ============================================================ + + cuvs::udf::metric_source per_dim_weighted = { + .source = R"( + #include + + template + struct per_dim_weighted_l2 + : cuvs::udf::metric_interface + { + using point_type = cuvs::udf::point; + + __device__ void operator()(AccT& acc, point_type x, point_type y) override { + // Per-dimension weights using element access + for (int i = 0; i < x.size(); ++i) { + auto diff = x[i] - y[i]; + auto weight = AccT{1} + AccT{i} * AccT{0.1}; // Increasing weights + acc += weight * diff * diff; + } + } + }; + )", + .struct_name = "per_dim_weighted_l2", + .headers = {}}; + + // ============================================================ + // Search configuration + // ============================================================ + + cuvs::neighbors::ivf_flat::search_params params; + params.n_probes = 50; + // params.udf.metric = weighted_metric; + + std::cout << "Weighted Euclidean distance metric example!\n"; + std::cout << "\n"; + std::cout << "This demonstrates:\n"; + std::cout << " 1. Using custom helper headers with UDFs\n"; + std::cout << " 2. Using cuvs::udf::squared_diff(x, y) helper\n"; + std::cout << " 3. Per-dimension weights using x[i], y[i] element access\n"; + std::cout << "\n"; + std::cout << "The point wrapper provides:\n"; + std::cout << " - squared_diff(x, y) : optimal for all types\n"; + std::cout << " - x[i], y[i] : element access for custom logic\n"; + std::cout << " - x.raw() : raw storage for power users\n"; + + return 0; +} From 447532eb2e4bc99e4b8dee2df36140a3e6f2792b Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 30 Jan 2026 00:19:51 +0000 Subject: [PATCH 073/158] stitch together --- cpp/CMakeLists.txt | 1 + .../cuvs/detail/jit_lto/FragmentDatabase.h | 10 + .../cuvs/detail/jit_lto/FragmentEntry.h | 9 + .../jit_lto/NVRTCLTOFragmentCompiler.hpp | 19 + cpp/include/cuvs/neighbors/ivf_flat.hpp | 351 +++++++++++++++++- .../cuvs/udf/README_UDF_API_PROPOSAL.md | 226 ----------- cpp/include/cuvs/udf/compiler.hpp | 101 ----- cpp/include/cuvs/udf/metric_interface.cuh | 61 --- cpp/include/cuvs/udf/metric_macro.hpp | 73 ---- cpp/include/cuvs/udf/metric_source.hpp | 72 ---- cpp/include/cuvs/udf/packed_helpers.cuh | 80 ---- cpp/include/cuvs/udf/point.cuh | 220 ----------- cpp/src/detail/jit_lto/FragmentDatabase.cu | 10 + cpp/src/detail/jit_lto/FragmentEntry.cu | 16 + .../jit_lto/NVRTCLTOFragmentCompiler.cu | 74 ++++ .../ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp | 17 + cpp/src/udf/compiler.cpp | 242 ------------ 17 files changed, 506 insertions(+), 1076 deletions(-) create mode 100644 cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp delete mode 100644 cpp/include/cuvs/udf/README_UDF_API_PROPOSAL.md delete mode 100644 cpp/include/cuvs/udf/compiler.hpp delete mode 100644 cpp/include/cuvs/udf/metric_interface.cuh delete mode 100644 cpp/include/cuvs/udf/metric_macro.hpp delete mode 100644 cpp/include/cuvs/udf/metric_source.hpp delete mode 100644 cpp/include/cuvs/udf/packed_helpers.cuh delete mode 100644 cpp/include/cuvs/udf/point.cuh create mode 100644 cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu create mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp delete mode 100644 cpp/src/udf/compiler.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e9218098f0..077e871860 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -363,6 +363,7 @@ if(NOT BUILD_CPU_ONLY) src/detail/jit_lto/FragmentDatabase.cu src/detail/jit_lto/FragmentEntry.cu src/detail/jit_lto/nvjitlink_checker.cpp + src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu ) endif() diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h index 890796ba44..0e9c63da54 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h @@ -34,6 +34,12 @@ class FragmentDatabase { unsigned char const* blob, std::size_t size); + friend void registerNVRTCFragment(std::string const& algo, + std::string const& params, + std::string const& mname, + std::size_t size, + std::unique_ptr&& program); + std::unordered_map> cache; }; @@ -43,3 +49,7 @@ void registerFatbinFragment(std::string const& algo, std::string const& params, unsigned char const* blob, std::size_t size); + +void registerNVRTCFragment(std::string const& key, + std::size_t size, + std::unique_ptr&& program); diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h index a376068425..4d428e3262 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h @@ -30,3 +30,12 @@ struct FatbinFragmentEntry final : FragmentEntry { std::size_t data_size = 0; unsigned char const* data_view = nullptr; }; + +struct NVRTCFragmentEntry final : FragmentEntry { + NVRTCFragmentEntry(std::string const& key, std::unique_ptr&& program std::size_t size); + + virtual bool add_to(nvJitLinkHandle& handle) const; + + std::size_t data_size = 0; + std::unique_ptr program{}; +}; diff --git a/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp b/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp new file mode 100644 index 0000000000..a3a0988949 --- /dev/null +++ b/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include + +struct NRTCLTOFragmentCompiler { + NRTCLTOFragmentCompiler(); + + void compile(std::string const& key, std::string const& code) const; + + std::vector standard_compile_opts; +}; diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index 9928a0e3ca..77a2381f9e 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -3035,4 +3035,353 @@ void recompute_internal_state(const raft::resources& res, index 1: wraps packed bytes in a 32-bit word + * + * @tparam T Data type (float, __half, int8_t, uint8_t) + * @tparam AccT Storage/accumulator type (float, __half, int32_t, uint32_t) + * @tparam Veclen Vector length (1, 2, 4, 8, 16) + * + * Usage: + * // Helpers deduce Veclen automatically: + * acc += cuvs::udf::squared_diff(x, y); // No template args! + * + * // Array access for custom logic (slower but flexible): + * for (int i = 0; i < x.size(); ++i) { + * acc += x[i] * y[i]; + * } + * + * // Query packing: + * if constexpr (decltype(x)::is_packed()) { ... } + */ +const std::string_view point_code = R"( + template + struct point { + using element_type = T; + using storage_type = AccT; + static constexpr int veclen = Veclen; + + storage_type data_; + + // ============================================================ + // Constructors + // ============================================================ + + __device__ __host__ point() = default; + __device__ __host__ explicit point(storage_type d) : data_(d) {} + + // ============================================================ + // Raw access (for power users who need intrinsics) + // ============================================================ + + __device__ __forceinline__ storage_type raw() const { return data_; } + __device__ __forceinline__ storage_type& raw() { return data_; } + + // ============================================================ + // Compile-time queries + // ============================================================ + + __device__ __host__ static constexpr int size() + { + // For packed int8/uint8: 4 elements per storage word + if constexpr ((std::is_same_v || std::is_same_v) && Veclen > 1) { + return 4; + } else { + return 1; + } + } + + __device__ __host__ static constexpr bool is_packed() + { + return (std::is_same_v || std::is_same_v) && Veclen > 1; + } + + // ============================================================ + // Element access (unpacks for int8/uint8) + // ============================================================ + + __device__ __forceinline__ T operator[](int i) const + { + if constexpr (std::is_same_v && Veclen > 1) { + // Extract signed byte i from packed int32_t + return static_cast((data_ >> (i * 8)) & 0xFF); + } else if constexpr (std::is_same_v && Veclen > 1) { + // Extract unsigned byte i from packed uint32_t + return static_cast((data_ >> (i * 8)) & 0xFF); + } else { + // Scalar types: only one element + (void)i; // Unused + return static_cast(data_); + } + } + }; + )"; +// ============================================================ +// Helper Operations - Deduce Veclen from point type! +// ============================================================ + +/** + * @brief Squared difference: (x - y)² + * + * Optimized for packed int8/uint8, falls back to scalar for float/half. + */ +const std::string_view squared_diff_code = R"( + template + __device__ __forceinline__ AccT squared_diff(point x, point y) + { + if constexpr (std::is_same_v && V > 1) { + // SIMD: 4 packed unsigned bytes + auto diff = __vabsdiffu4(x.raw(), y.raw()); + return __dp4a(diff, diff, AccT{0}); + } else if constexpr (std::is_same_v && V > 1) { + // SIMD: 4 packed signed bytes + auto diff = __vabsdiffs4(x.raw(), y.raw()); + return __dp4a(diff, diff, static_cast(0)); + } else { + // Scalar: float, half, or byte with Veclen==1 + auto diff = x.raw() - y.raw(); + return diff * diff; + } + } + )"; +/** + * @brief Absolute difference: |x - y| + * + * For packed types, returns sum of absolute differences. + */ +const std::string_view abs_diff_code = R"( + template + __device__ __forceinline__ AccT abs_diff(point x, point y) + { + if constexpr (std::is_same_v && V > 1) { + // SIMD: sum of 4 unsigned absolute differences + auto diff = __vabsdiffu4(x.raw(), y.raw()); + // Sum the 4 bytes + return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + + ((diff >> 24) & 0xFF); + } else if constexpr (std::is_same_v && V > 1) { + // SIMD: sum of 4 signed absolute differences + auto diff = __vabsdiffs4(x.raw(), y.raw()); + return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + + ((diff >> 24) & 0xFF); + } else { + // Scalar + auto a = x.raw(); + auto b = y.raw(); + return (a > b) ? (a - b) : (b - a); + } + } + )"; +/** + * @brief Dot product: x · y + * + * For packed types, computes sum of element-wise products. + */ +const std::string_view dot_product_code = R"( + template + __device__ __forceinline__ AccT dot_product(point x, point y) + { + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + // SIMD: dp4a computes dot product of 4 packed bytes + return __dp4a(x.raw(), y.raw(), AccT{0}); + } else { + // Scalar + return x.raw() * y.raw(); + } + } + )"; +/** + * @brief Element-wise product: x * y + * + * For packed types, returns sum of element-wise products (same as dot_product). + */ +const std::string_view product_code = R"( + template + __device__ __forceinline__ AccT product(point x, point y) + { + return dot_product(x, y); + } + )"; +/** + * @brief Element-wise sum: x + y + */ +const std::string_view sum_code = R"( + template + __device__ __forceinline__ AccT sum(point x, point y) + { + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + // Sum all unpacked elements + AccT result = 0; + for (int i = 0; i < x.size(); ++i) { + result += static_cast(x[i]) + static_cast(y[i]); + } + return result; + } else { + return x.raw() + y.raw(); + } + } + )"; +/** + * @brief Maximum element: max(x, y) + * + * For packed types, returns max across all element pairs. + */ +const std::string_view max_elem_code = R"( + template + __device__ __forceinline__ AccT max_elem(point x, point y) + { + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + AccT result = 0; + for (int i = 0; i < x.size(); ++i) { + auto xi = static_cast(x[i]); + auto yi = static_cast(y[i]); + auto val = (xi > yi) ? xi : yi; + if (val > result) result = val; + } + return result; + } else { + auto a = x.raw(); + auto b = y.raw(); + return (a > b) ? a : b; + } + } + )"; +/** + * @brief Base interface for custom distance metrics. + * + * Inherit from this interface to get compile-time enforcement of the + * correct operator() signature via the `override` keyword. + * + * If you forget to implement operator() or use the wrong signature, + * you'll get a clear compile error: "does not override any member function" + * + * @tparam T Data type (float, __half, int8_t, uint8_t) + * @tparam AccT Accumulator type (float, __half, int32_t, uint32_t) + * @tparam Veclen Vector length (handled by cuVS internally) + * + * @note x and y are point which provides: + * - .raw() : packed storage for power users + * - operator[] : unpacked element access + * - ::veclen : compile-time Veclen + * - ::is_packed() : whether data is packed + */ +const std::string_view metric_interface_code = R"( + template + struct metric_interface { + using point_type = point; + + /** + * @brief Compute distance contribution for one element pair. + * + * @param[in,out] acc Accumulated distance value + * @param[in] x Query vector element (point wrapper) + * @param[in] y Database vector element (point wrapper) + * + * Example: + * // Simple - use helpers (recommended): + * acc += squared_diff(x, y); + * + * // Array access for custom logic: + * for (int i = 0; i < x.size(); ++i) { + * acc += x[i] * y[i]; + * } + * + * // Power user - raw access: + * if constexpr (point_type::is_packed()) { + * acc = __dp4a(x.raw(), y.raw(), acc); + * } + */ + virtual __device__ void operator()(AccT& acc, point_type x, point_type y) = 0; + + virtual __device__ ~metric_interface() = default; + }; + )"; + +/** + * @brief Define a custom distance metric with compile-time validation. + * + * This macro creates: + * 1. A struct that inherits from metric_interface (compile-time validation) + * 2. A function NAME_udf() that returns a metric_source for JIT compilation + * + * @param NAME The name of your metric (becomes struct name and function prefix) + * @param BODY The body of operator()(AccT& acc, point_type x, point_type y) + * + * Available in BODY: + * acc - Accumulated distance (AccT&, modify in-place) + * x, y - Vector elements (point) + * T - Data type (float, __half, int8_t, uint8_t) + * AccT - Accumulator type + * Veclen - Vector length (compile-time constant) + * + * x and y provide: + * x.raw() - Raw packed storage (for power users) + * x[i] - Unpacked element access + * x.size() - Number of elements (4 for packed int8, 1 for float) + * x.is_packed() - Whether data is packed (constexpr) + * + * Helper functions (Veclen deduced automatically!): + * cuvs::udf::squared_diff(x, y) - (x-y)² optimized for all types + * cuvs::udf::abs_diff(x, y) - |x-y| optimized for all types + * cuvs::udf::dot_product(x, y) - x·y optimized for all types + * cuvs::udf::product(x, y) - element-wise product + * + * Example: + * CUVS_METRIC(my_l2, { + * acc += cuvs::udf::squared_diff(x, y); // Just works for all types! + * }) + * + * CUVS_METRIC(my_chebyshev, { + * for (int i = 0; i < x.size(); ++i) { + * auto diff = (x[i] > y[i]) ? (x[i] - y[i]) : (y[i] - x[i]); + * if (diff > acc) acc = diff; + * } + * }) + */ +#define CUVS_METRIC(NAME, BODY) \ + template \ + struct NAME : cuvs::udf::metric_interface { \ + using point_type = cuvs::udf::point; \ + __device__ void operator()(AccT& acc, point_type x, point_type y) override { BODY } \ + }; \ + \ + inline std::string NAME##_udf() \ + { \ + using namespace cuvs::neighbors::ivf_flat::udf; \ + std::string result; \ + result += point_code; \ + result += squared_diff_code; \ + result += abs_diff_code; \ + result += dot_product_code; \ + result += product_code; \ + result += sum_code; \ + result += max_elem_code; \ + result += metric_interface_code; \ + result += R"( \ +template \ +struct )" #NAME R"( : metric_interface { \ + using point_type = point; \ + __device__ void operator()(AccT& acc, point_type x, point_type y) override \ +)" #BODY R"( \ +}; \ + \ +template \ +__device__ void compute_dist(AccT& acc, AccT x, AccT y) \ +{ \ + )" #NAME R"( metric; \ + metric(acc, point(x), point(y)); \ +} \ +)"; \ + return result; \ + } + +void compile_metric(std::string const& code); +} // namespace udf + } // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/include/cuvs/udf/README_UDF_API_PROPOSAL.md b/cpp/include/cuvs/udf/README_UDF_API_PROPOSAL.md deleted file mode 100644 index 74d9b65385..0000000000 --- a/cpp/include/cuvs/udf/README_UDF_API_PROPOSAL.md +++ /dev/null @@ -1,226 +0,0 @@ -# cuVS UDF (User-Defined Function) API Proposal - -This folder contains a proposed UDF API for cuVS custom distance metrics. - -## Design Overview - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ User writes (via macro): │ -│ │ -│ CUVS_METRIC(my_l2, { │ -│ acc += cuvs::udf::squared_diff(x, y); // Just works! │ -│ }) │ -│ │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ x and y are point which provides: │ -│ │ -│ x.raw() - Raw packed storage (for power users) │ -│ x[i] - Unpacked element access │ -│ x.size() - Number of elements (4 for packed int8, 1 for float) │ -│ x.is_packed() - Whether data is packed (constexpr) │ -│ │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ Helper functions deduce Veclen automatically: │ -│ │ -│ cuvs::udf::squared_diff(x, y) - (x-y)² optimal for ALL types │ -│ cuvs::udf::abs_diff(x, y) - |x-y| optimal for ALL types │ -│ cuvs::udf::dot_product(x, y) - x·y optimal for ALL types │ -│ │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ At runtime, cuVS wraps raw values and calls your metric: │ -│ │ -│ point_t x{x_raw}, y{y_raw}; │ -│ my_l2{}(acc, x, y); │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -## Files - -``` -cpp/include/cuvs/udf/ -├── README_UDF_API_PROPOSAL.md # This file -├── point.cuh # point wrapper + helpers -├── metric_interface.cuh # Base interface (compile-time safety) -├── metric_macro.hpp # CUVS_METRIC macro -├── metric_source.hpp # metric_source struct -├── compiler.hpp # Internal JIT compiler interface -└── packed_helpers.cuh # [DEPRECATED] Legacy helpers - -cpp/src/udf/ -└── compiler.cpp # JIT compiler implementation - -examples/cpp/src/ -├── udf_simple_metric.cu # Basic example -├── udf_chebyshev_metric.cu # L∞ distance -├── udf_int8_metric.cu # int8/uint8 - now easy! -└── udf_weighted_metric.cu # Custom headers -``` - -## Quick Start - -### 1. Define Your Metric - The Easy Way - -```cpp -#include - -// L2 distance - works for float, half, int8, uint8! -CUVS_METRIC(my_l2, { - acc += cuvs::udf::squared_diff(x, y); -}) - -// L1 distance - works for all types! -CUVS_METRIC(my_l1, { - acc += cuvs::udf::abs_diff(x, y); -}) - -// Inner product - works for all types! -CUVS_METRIC(my_dot, { - acc += cuvs::udf::dot_product(x, y); -}) -``` - -### 2. Custom Logic with Element Access - -```cpp -// Chebyshev (L∞) distance - max absolute difference -CUVS_METRIC(chebyshev, { - for (int i = 0; i < x.size(); ++i) { - auto diff = (x[i] > y[i]) ? (x[i] - y[i]) : (y[i] - x[i]); - if (diff > acc) acc = static_cast(diff); - } -}) - -// Per-dimension weighted L2 -CUVS_METRIC(weighted_l2, { - for (int i = 0; i < x.size(); ++i) { - auto diff = x[i] - y[i]; - auto weight = AccT{1} + AccT{i} * AccT{0.1}; // Custom weights - acc += weight * diff * diff; - } -}) -``` - -### 3. Use in Search - -```cpp -#include - -int main() { - auto index = cuvs::neighbors::ivf_flat::deserialize(res, "index.bin"); - - cuvs::neighbors::ivf_flat::search_params params; - params.udf.metric = my_l2_udf(); // Auto-generated function! - - cuvs::neighbors::ivf_flat::search(res, params, index, queries, neighbors, distances); -} -``` - -## The `point` Wrapper - -The key innovation is wrapping raw values in `point`: - -```cpp -template -struct point { - storage_type data_; - - // Raw access for power users - __device__ storage_type raw() const; - - // Element access - handles unpacking automatically - __device__ T operator[](int i) const; - - // Compile-time queries - static constexpr int size(); // 4 for packed int8, 1 for float - static constexpr bool is_packed(); // true for int8/uint8 with Veclen > 1 -}; -``` - -### Benefits - -1. **Helpers deduce Veclen automatically** - no template args needed! -2. **Element access `x[i]`** - unpacks automatically for int8/uint8 -3. **Type queries** - `is_packed()`, `size()` for conditional logic -4. **Raw access** - `x.raw()` for power users who need intrinsics - -## Helper Functions - -All helpers deduce `Veclen` from the `point` type - no manual template args! - -| Helper | Description | int8/uint8 Implementation | -|--------|-------------|---------------------------| -| `squared_diff(x, y)` | (x-y)² | `__vabsdiffs4` + `__dp4a` | -| `abs_diff(x, y)` | \|x-y\| | `__vabsdiffs4` + byte sum | -| `dot_product(x, y)` | x·y | `__dp4a` | -| `product(x, y)` | element-wise × | `__dp4a` | -| `sum(x, y)` | element-wise + | unpacked loop | -| `max_elem(x, y)` | max element | unpacked loop | - -## Supported Types - -| Data Type | Accumulator | `x.size()` | Complexity | -|-----------|-------------|------------|------------| -| `float` | `float` | 1 | ⭐ Easy | -| `__half` | `__half` | 1 | ⭐ Easy | -| `int8_t` | `int32_t` | 4 (packed) | ⭐ Easy with helpers! | -| `uint8_t` | `uint32_t` | 4 (packed) | ⭐ Easy with helpers! | - -### int8/uint8 - Now Easy! - -**Before** (manual intrinsics): -```cpp -CUVS_METRIC(old_way, { - if constexpr (std::is_same_v && Veclen > 1) { - auto diff = __vabsdiffs4(x, y); // Must know this! - acc = raft::dp4a(diff, diff, acc); // And this! - } else { - auto diff = x - y; - acc += diff * diff; - } -}) -``` - -**After** (with point wrapper): -```cpp -CUVS_METRIC(new_way, { - acc += cuvs::udf::squared_diff(x, y); // Just works! -}) -``` - -## Power User Mode - -For maximum control, you can still access raw storage and use intrinsics: - -```cpp -CUVS_METRIC(power_user, { - if constexpr (decltype(x)::is_packed()) { - // Use SIMD intrinsics directly - acc = __dp4a(x.raw(), y.raw(), acc); - } else { - acc += x.raw() * y.raw(); - } -}) -``` - -## Error Handling - -```cpp -try { - search(res, params, index, queries, neighbors, distances); -} catch (const cuvs::udf::compilation_error& e) { - std::cerr << "UDF compilation failed:\n" << e.what() << std::endl; -} -``` - -## Summary - -| Feature | Benefit | -|---------|---------| -| **point wrapper** | Clean API, no raw AccT confusion | -| **Helper functions** | No intrinsics knowledge needed | -| **Auto Veclen deduction** | No template args in helpers | -| **Element access `x[i]`** | Custom logic without intrinsics | -| **Compile-time queries** | `is_packed()`, `size()` for branching | -| **Raw access `x.raw()`** | Power users can still use intrinsics | diff --git a/cpp/include/cuvs/udf/compiler.hpp b/cpp/include/cuvs/udf/compiler.hpp deleted file mode 100644 index 7f979772b1..0000000000 --- a/cpp/include/cuvs/udf/compiler.hpp +++ /dev/null @@ -1,101 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include "metric_source.hpp" - -namespace cuvs::udf::detail { - -/** - * @brief Compiled UDF fragment (LTO-IR). - */ -struct compiled_fragment { - std::vector lto_ir; - size_t size() const { return lto_ir.size(); } - const char* data() const { return lto_ir.data(); } -}; - -/** - * @brief Cache key for compiled UDFs. - */ -struct cache_key { - std::string source_hash; - std::string struct_name; - int veclen; - std::string data_type; - std::string acc_type; - int compute_capability; - - bool operator==(const cache_key& other) const - { - return source_hash == other.source_hash && struct_name == other.struct_name && - veclen == other.veclen && data_type == other.data_type && acc_type == other.acc_type && - compute_capability == other.compute_capability; - } -}; - -struct cache_key_hash { - size_t operator()(const cache_key& k) const; -}; - -/** - * @brief Thread-safe cache for compiled UDF fragments. - */ -class udf_cache { - public: - static udf_cache& instance(); - - std::shared_ptr get(const cache_key& key); - void put(const cache_key& key, std::shared_ptr fragment); - void clear(); - - private: - udf_cache() = default; - std::unordered_map, cache_key_hash> cache_; - std::mutex mutex_; -}; - -/** - * @brief Build the full source code for JIT compilation. - * - * Takes the user's struct source and appends: - * 1. Standard includes - * 2. The compute_dist wrapper function - * 3. The explicit instantiation for the given Veclen/T/AccT - * - * @param udf User's metric source (struct only) - * @param veclen Vector length from index - * @param data_type Data type string (e.g., "float") - * @param acc_type Accumulator type string (e.g., "float") - * @return Complete source ready for NVRTC - */ -std::string build_full_source(const metric_source& udf, - int veclen, - const std::string& data_type, - const std::string& acc_type); - -/** - * @brief Compile a UDF metric source to LTO-IR. - * - * @param udf The user's metric source - * @param veclen Vector length from index - * @param data_type Data type string - * @param acc_type Accumulator type string - * @return Compiled fragment (LTO-IR) - * @throws compilation_error if NVRTC compilation fails - */ -std::shared_ptr compile_metric(const metric_source& udf, - int veclen, - const std::string& data_type, - const std::string& acc_type); - -} // namespace cuvs::udf::detail diff --git a/cpp/include/cuvs/udf/metric_interface.cuh b/cpp/include/cuvs/udf/metric_interface.cuh deleted file mode 100644 index 4c36116e5f..0000000000 --- a/cpp/include/cuvs/udf/metric_interface.cuh +++ /dev/null @@ -1,61 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "point.cuh" - -namespace cuvs::udf { - -/** - * @brief Base interface for custom distance metrics. - * - * Inherit from this interface to get compile-time enforcement of the - * correct operator() signature via the `override` keyword. - * - * If you forget to implement operator() or use the wrong signature, - * you'll get a clear compile error: "does not override any member function" - * - * @tparam T Data type (float, __half, int8_t, uint8_t) - * @tparam AccT Accumulator type (float, __half, int32_t, uint32_t) - * @tparam Veclen Vector length (handled by cuVS internally) - * - * @note x and y are point which provides: - * - .raw() : packed storage for power users - * - operator[] : unpacked element access - * - ::veclen : compile-time Veclen - * - ::is_packed() : whether data is packed - */ -template -struct metric_interface { - using point_type = point; - - /** - * @brief Compute distance contribution for one element pair. - * - * @param[in,out] acc Accumulated distance value - * @param[in] x Query vector element (point wrapper) - * @param[in] y Database vector element (point wrapper) - * - * Example: - * // Simple - use helpers (recommended): - * acc += squared_diff(x, y); - * - * // Array access for custom logic: - * for (int i = 0; i < x.size(); ++i) { - * acc += x[i] * y[i]; - * } - * - * // Power user - raw access: - * if constexpr (point_type::is_packed()) { - * acc = __dp4a(x.raw(), y.raw(), acc); - * } - */ - virtual __device__ void operator()(AccT& acc, point_type x, point_type y) = 0; - - virtual __device__ ~metric_interface() = default; -}; - -} // namespace cuvs::udf diff --git a/cpp/include/cuvs/udf/metric_macro.hpp b/cpp/include/cuvs/udf/metric_macro.hpp deleted file mode 100644 index 69ad8157ed..0000000000 --- a/cpp/include/cuvs/udf/metric_macro.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "metric_interface.cuh" -#include "metric_source.hpp" -#include "point.cuh" - -/** - * @brief Define a custom distance metric with compile-time validation. - * - * This macro creates: - * 1. A struct that inherits from metric_interface (compile-time validation) - * 2. A function NAME_udf() that returns a metric_source for JIT compilation - * - * @param NAME The name of your metric (becomes struct name and function prefix) - * @param BODY The body of operator()(AccT& acc, point_type x, point_type y) - * - * Available in BODY: - * acc - Accumulated distance (AccT&, modify in-place) - * x, y - Vector elements (point) - * T - Data type (float, __half, int8_t, uint8_t) - * AccT - Accumulator type - * Veclen - Vector length (compile-time constant) - * - * x and y provide: - * x.raw() - Raw packed storage (for power users) - * x[i] - Unpacked element access - * x.size() - Number of elements (4 for packed int8, 1 for float) - * x.is_packed() - Whether data is packed (constexpr) - * - * Helper functions (Veclen deduced automatically!): - * cuvs::udf::squared_diff(x, y) - (x-y)² optimized for all types - * cuvs::udf::abs_diff(x, y) - |x-y| optimized for all types - * cuvs::udf::dot_product(x, y) - x·y optimized for all types - * cuvs::udf::product(x, y) - element-wise product - * - * Example: - * CUVS_METRIC(my_l2, { - * acc += cuvs::udf::squared_diff(x, y); // Just works for all types! - * }) - * - * CUVS_METRIC(my_chebyshev, { - * for (int i = 0; i < x.size(); ++i) { - * auto diff = (x[i] > y[i]) ? (x[i] - y[i]) : (y[i] - x[i]); - * if (diff > acc) acc = diff; - * } - * }) - */ -#define CUVS_METRIC(NAME, BODY) \ - template \ - struct NAME : cuvs::udf::metric_interface { \ - using point_type = cuvs::udf::point; \ - __device__ void operator()(AccT& acc, point_type x, point_type y) override { BODY } \ - }; \ - \ - inline cuvs::udf::metric_source NAME##_udf() \ - { \ - return cuvs::udf::metric_source{ \ - .source = R"( \ -template \ -struct )" #NAME R"( : cuvs::udf::metric_interface { \ - using point_type = cuvs::udf::point; \ - __device__ void operator()(AccT& acc, point_type x, point_type y) override \ -)" #BODY R"( \ -}; \ -)", \ - .struct_name = #NAME, \ - .headers = {}}; \ - } diff --git a/cpp/include/cuvs/udf/metric_source.hpp b/cpp/include/cuvs/udf/metric_source.hpp deleted file mode 100644 index 4344f8e923..0000000000 --- a/cpp/include/cuvs/udf/metric_source.hpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include -#include - -namespace cuvs::udf { - -/** - * @brief Source definition for a custom metric. - * - * Contains the struct source code and metadata needed for JIT compilation. - * Typically created via the CUVS_METRIC macro (see metric_macro.hpp). - */ -struct metric_source { - /** - * @brief CUDA source code containing the metric struct. - * - * Should define a template struct: - * template - * struct your_name { - * __device__ void operator()(AccT& acc, AccT x, AccT y) { ... } - * }; - * - * Note: Do NOT include the compute_dist wrapper or explicit instantiation. - * cuVS appends those automatically based on index properties. - */ - std::string source; - - /** - * @brief Name of the metric struct (without template parameters). - * - * cuVS uses this to generate: - * your_name{}(acc, x, y); - */ - std::string struct_name; - - /** - * @brief Optional headers the metric depends on. - * - * Map of header name -> header content. - * Passed to NVRTC's virtual filesystem. - */ - std::unordered_map headers; -}; - -/** - * @brief UDF configuration for search parameters. - */ -struct udf_config { - std::optional metric; - - // Future extensions: - // std::optional sample_filter; - // std::optional post_process; -}; - -/** - * @brief Exception thrown when UDF JIT compilation fails. - */ -class compilation_error : public std::runtime_error { - public: - explicit compilation_error(const std::string& msg) : std::runtime_error(msg) {} -}; - -} // namespace cuvs::udf diff --git a/cpp/include/cuvs/udf/packed_helpers.cuh b/cpp/include/cuvs/udf/packed_helpers.cuh deleted file mode 100644 index 1891343239..0000000000 --- a/cpp/include/cuvs/udf/packed_helpers.cuh +++ /dev/null @@ -1,80 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -// ============================================================================= -// DEPRECATED: Use point.cuh helpers instead! -// -// Old API: -// acc += cuvs::udf::packed_sq_diff_u8(x, y); -// -// New API (recommended): -// acc += cuvs::udf::squared_diff(x, y); // Works for ALL types! -// -// The new helpers take point and deduce types automatically. -// ============================================================================= - -#include "point.cuh" - -namespace cuvs::udf { - -// Legacy helpers for raw packed values (use point-based helpers instead!) - -/** - * @brief [DEPRECATED] Use squared_diff(x, y) with point wrapper instead. - */ -__device__ __forceinline__ uint32_t packed_sq_diff_u8(uint32_t x, uint32_t y) -{ - auto diff = __vabsdiffu4(x, y); - return __dp4a(diff, diff, 0u); -} - -/** - * @brief [DEPRECATED] Use squared_diff(x, y) with point wrapper instead. - */ -__device__ __forceinline__ int32_t packed_sq_diff_i8(int32_t x, int32_t y) -{ - auto diff = __vabsdiffs4(x, y); - return __dp4a(diff, diff, 0); -} - -/** - * @brief [DEPRECATED] Use abs_diff(x, y) with point wrapper instead. - */ -__device__ __forceinline__ uint32_t packed_l1_u8(uint32_t x, uint32_t y) -{ - auto diff = __vabsdiffu4(x, y); - return (diff & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + ((diff >> 24) & 0xFF); -} - -/** - * @brief [DEPRECATED] Use abs_diff(x, y) with point wrapper instead. - */ -__device__ __forceinline__ int32_t packed_l1_i8(int32_t x, int32_t y) -{ - auto diff = __vabsdiffs4(x, y); - uint32_t udiff = static_cast(diff); - return static_cast((udiff & 0xFF) + ((udiff >> 8) & 0xFF) + ((udiff >> 16) & 0xFF) + - ((udiff >> 24) & 0xFF)); -} - -/** - * @brief [DEPRECATED] Use dot_product(x, y) with point wrapper instead. - */ -__device__ __forceinline__ uint32_t packed_dot_u8(uint32_t x, uint32_t y) -{ - return __dp4a(x, y, 0u); -} - -/** - * @brief [DEPRECATED] Use dot_product(x, y) with point wrapper instead. - */ -__device__ __forceinline__ int32_t packed_dot_i8(int32_t x, int32_t y) -{ - return __dp4a(x, y, 0); -} - -} // namespace cuvs::udf diff --git a/cpp/include/cuvs/udf/point.cuh b/cpp/include/cuvs/udf/point.cuh deleted file mode 100644 index 903205d494..0000000000 --- a/cpp/include/cuvs/udf/point.cuh +++ /dev/null @@ -1,220 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include - -namespace cuvs::udf { - -/** - * @brief Wrapper for vector elements that provides both packed and unpacked access. - * - * For float/half: trivial wrapper around scalar values - * For int8/uint8 with Veclen > 1: wraps packed bytes in a 32-bit word - * - * @tparam T Data type (float, __half, int8_t, uint8_t) - * @tparam AccT Storage/accumulator type (float, __half, int32_t, uint32_t) - * @tparam Veclen Vector length (1, 2, 4, 8, 16) - * - * Usage: - * // Helpers deduce Veclen automatically: - * acc += cuvs::udf::squared_diff(x, y); // No template args! - * - * // Array access for custom logic (slower but flexible): - * for (int i = 0; i < x.size(); ++i) { - * acc += x[i] * y[i]; - * } - * - * // Query packing: - * if constexpr (decltype(x)::is_packed()) { ... } - */ -template -struct point { - using element_type = T; - using storage_type = AccT; - static constexpr int veclen = Veclen; - - storage_type data_; - - // ============================================================ - // Constructors - // ============================================================ - - __device__ __host__ point() = default; - __device__ __host__ explicit point(storage_type d) : data_(d) {} - - // ============================================================ - // Raw access (for power users who need intrinsics) - // ============================================================ - - __device__ __forceinline__ storage_type raw() const { return data_; } - __device__ __forceinline__ storage_type& raw() { return data_; } - - // ============================================================ - // Compile-time queries - // ============================================================ - - __device__ __host__ static constexpr int size() - { - // For packed int8/uint8: 4 elements per storage word - if constexpr ((std::is_same_v || std::is_same_v) && Veclen > 1) { - return 4; - } else { - return 1; - } - } - - __device__ __host__ static constexpr bool is_packed() - { - return (std::is_same_v || std::is_same_v) && Veclen > 1; - } - - // ============================================================ - // Element access (unpacks for int8/uint8) - // ============================================================ - - __device__ __forceinline__ T operator[](int i) const - { - if constexpr (std::is_same_v && Veclen > 1) { - // Extract signed byte i from packed int32_t - return static_cast((data_ >> (i * 8)) & 0xFF); - } else if constexpr (std::is_same_v && Veclen > 1) { - // Extract unsigned byte i from packed uint32_t - return static_cast((data_ >> (i * 8)) & 0xFF); - } else { - // Scalar types: only one element - (void)i; // Unused - return static_cast(data_); - } - } -}; - -// ============================================================ -// Helper Operations - Deduce Veclen from point type! -// ============================================================ - -/** - * @brief Squared difference: (x - y)² - * - * Optimized for packed int8/uint8, falls back to scalar for float/half. - */ -template -__device__ __forceinline__ AccT squared_diff(point x, point y) -{ - if constexpr (std::is_same_v && V > 1) { - // SIMD: 4 packed unsigned bytes - auto diff = __vabsdiffu4(x.raw(), y.raw()); - return __dp4a(diff, diff, AccT{0}); - } else if constexpr (std::is_same_v && V > 1) { - // SIMD: 4 packed signed bytes - auto diff = __vabsdiffs4(x.raw(), y.raw()); - return __dp4a(diff, diff, static_cast(0)); - } else { - // Scalar: float, half, or byte with Veclen==1 - auto diff = x.raw() - y.raw(); - return diff * diff; - } -} - -/** - * @brief Absolute difference: |x - y| - * - * For packed types, returns sum of absolute differences. - */ -template -__device__ __forceinline__ AccT abs_diff(point x, point y) -{ - if constexpr (std::is_same_v && V > 1) { - // SIMD: sum of 4 unsigned absolute differences - auto diff = __vabsdiffu4(x.raw(), y.raw()); - // Sum the 4 bytes - return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + - ((diff >> 24) & 0xFF); - } else if constexpr (std::is_same_v && V > 1) { - // SIMD: sum of 4 signed absolute differences - auto diff = __vabsdiffs4(x.raw(), y.raw()); - return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + - ((diff >> 24) & 0xFF); - } else { - // Scalar - auto a = x.raw(); - auto b = y.raw(); - return (a > b) ? (a - b) : (b - a); - } -} - -/** - * @brief Dot product: x · y - * - * For packed types, computes sum of element-wise products. - */ -template -__device__ __forceinline__ AccT dot_product(point x, point y) -{ - if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { - // SIMD: dp4a computes dot product of 4 packed bytes - return __dp4a(x.raw(), y.raw(), AccT{0}); - } else { - // Scalar - return x.raw() * y.raw(); - } -} - -/** - * @brief Element-wise product: x * y - * - * For packed types, returns sum of element-wise products (same as dot_product). - */ -template -__device__ __forceinline__ AccT product(point x, point y) -{ - return dot_product(x, y); -} - -/** - * @brief Element-wise sum: x + y - */ -template -__device__ __forceinline__ AccT sum(point x, point y) -{ - if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { - // Sum all unpacked elements - AccT result = 0; - for (int i = 0; i < x.size(); ++i) { - result += static_cast(x[i]) + static_cast(y[i]); - } - return result; - } else { - return x.raw() + y.raw(); - } -} - -/** - * @brief Maximum element: max(x, y) - * - * For packed types, returns max across all element pairs. - */ -template -__device__ __forceinline__ AccT max_elem(point x, point y) -{ - if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { - AccT result = 0; - for (int i = 0; i < x.size(); ++i) { - auto xi = static_cast(x[i]); - auto yi = static_cast(y[i]); - auto val = (xi > yi) ? xi : yi; - if (val > result) result = val; - } - return result; - } else { - auto a = x.raw(); - auto b = y.raw(); - return (a > b) ? a : b; - } -} - -} // namespace cuvs::udf diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index ea43b987fb..b0a36d2f73 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -45,3 +45,13 @@ void registerFatbinFragment(std::string const& algo, if (entry_exists) { return; } planner.cache[key] = std::make_unique(key, blob, size); } + +void registerNVRTCFragment(std::string const& key, + std::unique_ptr&& program, + std::size_t size) +{ + auto& planner = fragment_database(); + auto entry_exists = planner.make_cache_entry(key); + if (entry_exists) { return; } + planner.cache[key] = std::make_unique(key, std::move(program), size); +} diff --git a/cpp/src/detail/jit_lto/FragmentEntry.cu b/cpp/src/detail/jit_lto/FragmentEntry.cu index 7f0f83ebb9..b726ae7c47 100644 --- a/cpp/src/detail/jit_lto/FragmentEntry.cu +++ b/cpp/src/detail/jit_lto/FragmentEntry.cu @@ -26,3 +26,19 @@ bool FatbinFragmentEntry::add_to(nvJitLinkHandle& handle) const check_nvjitlink_result(handle, result); return true; } + +NVRTCFragmentEntry::NVRTCFragmentEntry(std::string const& key, + std::unique_ptr&& program, + std::size_t size) + : FragmentEntry(key), program(std::move(program)), data_size(size) +{ +} + +bool NVRTCFragmentEntry::add_to(nvJitLinkHandle& handle) const +{ + auto result = nvJitLinkAddData( + handle, NVJITLINK_INPUT_LTOIR, this->program.get(), this->data_size, this->compute_key.c_str()); + check_nvjitlink_result(handle, result); + + return true; +} diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu new file mode 100644 index 0000000000..32ff738fcb --- /dev/null +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -0,0 +1,74 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include + +#include "cuda.h" +#include "nvrtc.h" + +#define NVRTC_SAFE_CALL(_call) \ + do { \ + nvrtcResult result = _call; \ + if (result != NVRTC_SUCCESS) { \ + std::cerr << "\nerror: " #_call " failed with error " << nvrtcGetErrorString(result) \ + << '\n'; \ + exit(1); \ + } \ + } while (0) + +NRTCLTOFragmentCompiler::NRTCLTOFragmentCompiler() +{ + int device = 0; + int major = 0; + int minor = 0; + cudaGetDevice(&device); + cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device); + cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device); + + this->standard_compile_opts.resize(4); + + std::size_t i = 0; + this->standard_compile_opts[i++] = + std::string{"-arch=sm_" + std::to_string((major * 10 + minor))}; + this->standard_compile_opts[i++] = std::string{"-dlto"}; + this->standard_compile_opts[i++] = std::string{"-rdc=true"}; + this->standard_compile_opts[i++] = std::string{"-default-device"}; +} + +void NRTCLTOFragmentCompiler::compile(std::string const& key, std::string const& code) const +{ + nvrtcProgram prog; + NVRTC_SAFE_CALL( + nvrtcCreateProgram(&prog, code.c_str(), "nvrtc_lto_fragment", 0, nullptr, nullptr)); + + nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog + this->standard_compile_opts.size(), // numOptions + this->standard_compile_opts.data()); // options + + if (compileResult != NVRTC_SUCCESS) { + // Obtain compilation log from the program. + size_t log_size; + NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &log_size)); + std::unique_ptr log{new char[log_size]}; + NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log.get())); + std::cerr << "nvrtrc compile error log: \n"; + std::cerr << log.get() << '\n'; + exit(1); + } + + // Obtain generated LTO IR from the program. + std::size_t ltoIRSize; + NVRTC_SAFE_CALL(nvrtcGetLTOIRSize(prog, <oIRSize)); + + std::unique_ptr program = std::make_unique(ltoIRSize); + nvrtcGetLTOIR(prog, program.get()); + + NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); + + registerNVRTCFragment(key, std::move(program), ltoIRSize); +} diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp new file mode 100644 index 0000000000..c56aed94ac --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp @@ -0,0 +1,17 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +namespace cuvs::neighbors::ivf_flat::udf { + +void compile_metric(std::string const& code) +{ + NVRTCLTOFragmentCompiler compiler; + compiler.compile("IVF_FLAT_SEARCH_METRIC_UDF", code); +} + +} // namespace cuvs::neighbors::ivf_flat::udf diff --git a/cpp/src/udf/compiler.cpp b/cpp/src/udf/compiler.cpp deleted file mode 100644 index e57a03077c..0000000000 --- a/cpp/src/udf/compiler.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -/** - * @file compiler.cpp - * @brief Implementation of UDF JIT compiler using NVRTC - * - * This file shows how cuVS implements JIT compilation of user-defined metrics. - * Key responsibility: append the compute_dist wrapper and explicit instantiation - * to the user's struct definition. - */ - -#include - -#include -#include -#include - -#include -#include -#include - -namespace cuvs::udf::detail { - -// ============================================================ -// Cache Implementation -// ============================================================ - -size_t cache_key_hash::operator()(const cache_key& k) const -{ - size_t h = 0; - auto hash_combine = [&h](const auto& v) { - h ^= std::hash>{}(v) + 0x9e3779b9 + (h << 6) + (h >> 2); - }; - - hash_combine(k.source_hash); - hash_combine(k.struct_name); - hash_combine(k.veclen); - hash_combine(k.data_type); - hash_combine(k.acc_type); - hash_combine(k.compute_capability); - - return h; -} - -udf_cache& udf_cache::instance() -{ - static udf_cache cache; - return cache; -} - -std::shared_ptr udf_cache::get(const cache_key& key) -{ - std::lock_guard lock(mutex_); - auto it = cache_.find(key); - if (it != cache_.end()) { return it->second; } - return nullptr; -} - -void udf_cache::put(const cache_key& key, std::shared_ptr fragment) -{ - std::lock_guard lock(mutex_); - cache_[key] = std::move(fragment); -} - -void udf_cache::clear() -{ - std::lock_guard lock(mutex_); - cache_.clear(); -} - -// ============================================================ -// Source Building - THE KEY FUNCTION -// ============================================================ - -std::string build_full_source(const metric_source& udf, - int veclen, - const std::string& data_type, - const std::string& acc_type) -{ - std::stringstream ss; - - // 1. Standard includes - ss << "#include \n"; - ss << "#include \n"; - ss << "#include \n\n"; - - // 2. Include the point wrapper and metric interface - ss << "#include \n"; - ss << "#include \n\n"; - - // 3. Open namespace - ss << "namespace cuvs::neighbors::ivf_flat::detail {\n\n"; - - // 4. User's struct definition (from metric_source.source) - // This is ONLY the struct - no wrapper, no instantiation - ss << "// User-defined metric struct\n"; - ss << udf.source << "\n\n"; - - // 5. cuVS adds the compute_dist wrapper function - // This calls the user's struct with point-wrapped arguments - ss << "// cuVS-generated wrapper function\n"; - ss << "template \n"; - ss << "__device__ void compute_dist(AccT& acc, AccT x_raw, AccT y_raw) {\n"; - ss << " // Wrap raw values in point\n"; - ss << " using point_t = cuvs::udf::point;\n"; - ss << " point_t x{x_raw};\n"; - ss << " point_t y{y_raw};\n"; - ss << " " << udf.struct_name << "{}(acc, x, y);\n"; - ss << "}\n\n"; - - // 6. cuVS adds the explicit instantiation - // Based on index.veclen() and index.data_type() - ss << "// cuVS-generated explicit instantiation\n"; - ss << "template __device__ void compute_dist<" << veclen << ", " << data_type << ", " << acc_type - << ">(" << acc_type << "&, " << acc_type << ", " << acc_type << ");\n\n"; - - // 7. Close namespace - ss << "} // namespace cuvs::neighbors::ivf_flat::detail\n"; - - return ss.str(); -} - -// ============================================================ -// Hash helper -// ============================================================ - -static std::string compute_source_hash(const std::string& source) -{ - std::hash hasher; - return std::to_string(hasher(source)); -} - -// ============================================================ -// NVRTC Error Checking -// ============================================================ - -static void check_nvrtc(nvrtcResult result, const char* msg) -{ - if (result != NVRTC_SUCCESS) { - std::stringstream ss; - ss << msg << ": " << nvrtcGetErrorString(result); - throw compilation_error(ss.str()); - } -} - -// ============================================================ -// Main Compilation Function -// ============================================================ - -std::shared_ptr compile_metric(const metric_source& udf, - int veclen, - const std::string& data_type, - const std::string& acc_type) -{ - // 1. Get device compute capability - int device; - cudaGetDevice(&device); - cudaDeviceProp props; - cudaGetDeviceProperties(&props, device); - int cc = props.major * 10 + props.minor; - - // 2. Check cache first - cache_key key{.source_hash = compute_source_hash(udf.source), - .struct_name = udf.struct_name, - .veclen = veclen, - .data_type = data_type, - .acc_type = acc_type, - .compute_capability = cc}; - - auto& cache = udf_cache::instance(); - if (auto cached = cache.get(key)) { return cached; } - - // 3. Build full source (user struct + wrapper + instantiation) - std::string full_source = build_full_source(udf, veclen, data_type, acc_type); - - // 4. Prepare headers for NVRTC (include point.cuh and metric_interface.cuh) - std::vector header_names; - std::vector header_contents; - - for (const auto& [name, content] : udf.headers) { - header_names.push_back(name.c_str()); - header_contents.push_back(content.c_str()); - } - - // 5. Create NVRTC program - nvrtcProgram prog; - check_nvrtc(nvrtcCreateProgram(&prog, - full_source.c_str(), - "udf_metric.cu", - static_cast(header_names.size()), - header_contents.data(), - header_names.data()), - "Failed to create NVRTC program"); - - // 6. Compile options for LTO - std::string arch_opt = "--gpu-architecture=compute_" + std::to_string(cc); - - const char* opts[] = { - arch_opt.c_str(), - "-dlto", // Generate LTO-IR - "--relocatable-device-code=true", - "-std=c++17", - "-default-device", - }; - - nvrtcResult compile_result = nvrtcCompileProgram(prog, 5, opts); - - // 7. Get compilation log - size_t log_size; - nvrtcGetProgramLogSize(prog, &log_size); - - std::string log; - if (log_size > 1) { - log.resize(log_size); - nvrtcGetProgramLog(prog, log.data()); - } - - if (compile_result != NVRTC_SUCCESS) { - nvrtcDestroyProgram(&prog); - throw compilation_error("UDF compilation failed:\n" + log); - } - - // 8. Get LTO-IR - size_t lto_size; - check_nvrtc(nvrtcGetLTOIRSize(prog, <o_size), "Failed to get LTO-IR size"); - - auto fragment = std::make_shared(); - fragment->lto_ir.resize(lto_size); - check_nvrtc(nvrtcGetLTOIR(prog, fragment->lto_ir.data()), "Failed to get LTO-IR"); - - nvrtcDestroyProgram(&prog); - - // 9. Cache and return - cache.put(key, fragment); - - return fragment; -} - -} // namespace cuvs::udf::detail From e1627d17409bc00c3effce4da2c0df2d66106703 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 30 Jan 2026 00:24:58 +0000 Subject: [PATCH 074/158] add udf to cmakelists --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 077e871860..e31792ed7d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -364,6 +364,7 @@ if(NOT BUILD_CPU_ONLY) src/detail/jit_lto/FragmentEntry.cu src/detail/jit_lto/nvjitlink_checker.cpp src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu + src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp ) endif() From f7ea581397deef933672b5efc19d838c56784b2e Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 30 Jan 2026 23:50:23 +0000 Subject: [PATCH 075/158] udfs working e2e --- cpp/CMakeLists.txt | 4 +- .../cuvs/detail/jit_lto/FragmentDatabase.h | 12 +- .../cuvs/detail/jit_lto/FragmentEntry.h | 2 +- .../jit_lto/NVRTCLTOFragmentCompiler.hpp | 6 +- .../ivf_flat/interleaved_scan_tags.hpp | 3 + cpp/include/cuvs/distance/distance.hpp | 6 +- cpp/include/cuvs/neighbors/ivf_flat.hpp | 591 ++++++++++-------- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 2 + .../jit_lto/NVRTCLTOFragmentCompiler.cu | 19 +- .../ivf_flat/ivf_flat_interleaved_scan.cuh | 3 +- ...vf_flat_interleaved_scan_explicit_inst.cuh | 1 + .../ivf_flat_interleaved_scan_ext.cuh | 4 +- .../ivf_flat_interleaved_scan_jit.cuh | 68 +- .../neighbors/ivf_flat/ivf_flat_search.cuh | 13 +- .../ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp | 17 - cpp/src/neighbors/ivf_flat_index.cpp | 8 +- cpp/src/neighbors/refine/refine_device.cuh | 3 +- cpp/tests/CMakeLists.txt | 7 + cpp/tests/neighbors/ann_ivf_flat/test_udf.cu | 325 ++++++++++ examples/cpp/src/udf_chebyshev_metric.cu | 93 --- examples/cpp/src/udf_int8_metric.cu | 117 ---- examples/cpp/src/udf_simple_metric.cu | 98 --- examples/cpp/src/udf_weighted_metric.cu | 133 ---- 23 files changed, 787 insertions(+), 748 deletions(-) delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp create mode 100644 cpp/tests/neighbors/ann_ivf_flat/test_udf.cu delete mode 100644 examples/cpp/src/udf_chebyshev_metric.cu delete mode 100644 examples/cpp/src/udf_int8_metric.cu delete mode 100644 examples/cpp/src/udf_simple_metric.cu delete mode 100644 examples/cpp/src/udf_weighted_metric.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e31792ed7d..1fe15f0610 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -364,7 +364,6 @@ if(NOT BUILD_CPU_ONLY) src/detail/jit_lto/FragmentEntry.cu src/detail/jit_lto/nvjitlink_checker.cpp src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu - src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp ) endif() @@ -606,6 +605,7 @@ if(NOT BUILD_CPU_ONLY) ${CUVS_CTK_MATH_DEPENDENCIES} $ $ + $<$:CUDA::nvrtc> ) target_include_directories( @@ -689,6 +689,7 @@ if(NOT BUILD_CPU_ONLY) $ $ $<$:CUDA::nvJitLink> + $<$:CUDA::nvrtc> $<$:$> ) @@ -749,6 +750,7 @@ SECTIONS PRIVATE $ $<$:CUDA::nvJitLink> + $<$:CUDA::nvrtc> $<$:CUDA::nvtx3> $ $ diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h index 0e9c63da54..9d3849eaef 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h @@ -34,11 +34,9 @@ class FragmentDatabase { unsigned char const* blob, std::size_t size); - friend void registerNVRTCFragment(std::string const& algo, - std::string const& params, - std::string const& mname, - std::size_t size, - std::unique_ptr&& program); + friend void registerNVRTCFragment(std::string const& key, + std::unique_ptr&& program, + std::size_t size); std::unordered_map> cache; }; @@ -51,5 +49,5 @@ void registerFatbinFragment(std::string const& algo, std::size_t size); void registerNVRTCFragment(std::string const& key, - std::size_t size, - std::unique_ptr&& program); + std::unique_ptr&& program, + std::size_t size); diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h index 4d428e3262..3bbe7d31a8 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h @@ -32,7 +32,7 @@ struct FatbinFragmentEntry final : FragmentEntry { }; struct NVRTCFragmentEntry final : FragmentEntry { - NVRTCFragmentEntry(std::string const& key, std::unique_ptr&& program std::size_t size); + NVRTCFragmentEntry(std::string const& key, std::unique_ptr&& program, std::size_t size); virtual bool add_to(nvJitLinkHandle& handle) const; diff --git a/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp b/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp index a3a0988949..3c14fbd425 100644 --- a/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp +++ b/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp @@ -5,13 +5,11 @@ #pragma once -#include #include -#include #include -struct NRTCLTOFragmentCompiler { - NRTCLTOFragmentCompiler(); +struct NVRTCLTOFragmentCompiler { + NVRTCLTOFragmentCompiler(); void compile(std::string const& key, std::string const& code) const; diff --git a/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp b/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp index d9ed7e6b0b..19ded7e8ad 100644 --- a/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp +++ b/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp @@ -37,6 +37,9 @@ struct tag_metric_euclidean {}; template struct tag_metric_inner_product {}; +template +struct tag_metric_custom_udf {}; + // Tag types for post-processing struct tag_post_identity {}; struct tag_post_sqrt {}; diff --git a/cpp/include/cuvs/distance/distance.hpp b/cpp/include/cuvs/distance/distance.hpp index 13c8c7bd7e..df7d45c8a6 100644 --- a/cpp/include/cuvs/distance/distance.hpp +++ b/cpp/include/cuvs/distance/distance.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -59,7 +59,9 @@ enum class DistanceType : int { /** Bitstring Hamming distance **/ BitwiseHamming = 20, /** Precomputed (special value) **/ - Precomputed = 100 + Precomputed = 100, + /** Custom metric UDF **/ + CustomUDF = 101 }; /** diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index 77a2381f9e..56162c0d67 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -71,6 +71,8 @@ struct index_params : cuvs::neighbors::index_params { struct search_params : cuvs::neighbors::search_params { /** The number of clusters to search. */ uint32_t n_probes = 20; + /** Custom metric UDF code. */ + std::optional metric_udf = std::nullopt; }; static_assert(std::is_aggregate_v); @@ -167,6 +169,8 @@ struct index : cuvs::neighbors::index { /** Distance metric used for clustering. */ cuvs::distance::DistanceType metric() const noexcept; + void set_metric(cuvs::distance::DistanceType metric); + /** Whether `centers()` change upon extending the index (ivf_flat::extend). */ bool adaptive_centers() const noexcept; @@ -3038,6 +3042,10 @@ void recompute_internal_state(const raft::resources& res, index - struct point { - using element_type = T; - using storage_type = AccT; - static constexpr int veclen = Veclen; - - storage_type data_; - - // ============================================================ - // Constructors - // ============================================================ - - __device__ __host__ point() = default; - __device__ __host__ explicit point(storage_type d) : data_(d) {} - - // ============================================================ - // Raw access (for power users who need intrinsics) - // ============================================================ - - __device__ __forceinline__ storage_type raw() const { return data_; } - __device__ __forceinline__ storage_type& raw() { return data_; } - - // ============================================================ - // Compile-time queries - // ============================================================ - - __device__ __host__ static constexpr int size() - { - // For packed int8/uint8: 4 elements per storage word - if constexpr ((std::is_same_v || std::is_same_v) && Veclen > 1) { - return 4; - } else { - return 1; - } - } - - __device__ __host__ static constexpr bool is_packed() - { - return (std::is_same_v || std::is_same_v) && Veclen > 1; - } - - // ============================================================ - // Element access (unpacks for int8/uint8) - // ============================================================ - - __device__ __forceinline__ T operator[](int i) const - { - if constexpr (std::is_same_v && Veclen > 1) { - // Extract signed byte i from packed int32_t - return static_cast((data_ >> (i * 8)) & 0xFF); - } else if constexpr (std::is_same_v && Veclen > 1) { - // Extract unsigned byte i from packed uint32_t - return static_cast((data_ >> (i * 8)) & 0xFF); - } else { - // Scalar types: only one element - (void)i; // Unused - return static_cast(data_); - } - } - }; - )"; + */ +template +struct point { + using element_type = T; + using storage_type = AccT; + static constexpr int veclen = Veclen; + + storage_type data_; + + __device__ __host__ point() = default; + __device__ __host__ explicit point(storage_type d) : data_(d) {} + + __device__ __forceinline__ storage_type raw() const { return data_; } + __device__ __forceinline__ storage_type& raw() { return data_; } + + __device__ __host__ static constexpr int size() + { + if constexpr ((std::is_same_v || std::is_same_v) && Veclen > 1) { + return 4; + } else { + return 1; + } + } + + __device__ __host__ static constexpr bool is_packed() + { + return (std::is_same_v || std::is_same_v) && Veclen > 1; + } + + __device__ __forceinline__ T operator[](int i) const + { + if constexpr (std::is_same_v && Veclen > 1) { + return static_cast((data_ >> (i * 8)) & 0xFF); + } else if constexpr (std::is_same_v && Veclen > 1) { + return static_cast((data_ >> (i * 8)) & 0xFF); + } else { + (void)i; + return static_cast(data_); + } + } +}; + +/** + * @brief Base interface for custom distance metrics. + */ +template +struct metric_interface { + using point_type = point; + + virtual __device__ void operator()(AccT& acc, point_type x, point_type y) = 0; + virtual __device__ ~metric_interface() = default; +}; + // ============================================================ // Helper Operations - Deduce Veclen from point type! // ============================================================ +/** @brief Squared difference: (x - y)² */ +template +__device__ __forceinline__ AccT squared_diff(point x, point y) +{ + if constexpr (std::is_same_v && V > 1) { + auto diff = __vabsdiffu4(x.raw(), y.raw()); + return __dp4a(diff, diff, AccT{0}); + } else if constexpr (std::is_same_v && V > 1) { + auto diff = __vabsdiffs4(x.raw(), y.raw()); + return __dp4a(diff, diff, static_cast(0)); + } else { + auto diff = x.raw() - y.raw(); + return diff * diff; + } +} + +/** @brief Absolute difference: |x - y| */ +template +__device__ __forceinline__ AccT abs_diff(point x, point y) +{ + if constexpr (std::is_same_v && V > 1) { + auto diff = __vabsdiffu4(x.raw(), y.raw()); + return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + + ((diff >> 24) & 0xFF); + } else if constexpr (std::is_same_v && V > 1) { + auto diff = __vabsdiffs4(x.raw(), y.raw()); + return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + + ((diff >> 24) & 0xFF); + } else { + auto a = x.raw(); + auto b = y.raw(); + return (a > b) ? (a - b) : (b - a); + } +} + +/** @brief Dot product: x · y */ +template +__device__ __forceinline__ AccT dot_product(point x, point y) +{ + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + return __dp4a(x.raw(), y.raw(), AccT{0}); + } else { + return x.raw() * y.raw(); + } +} + +/** @brief Element-wise product: x * y */ +template +__device__ __forceinline__ AccT product(point x, point y) +{ + return dot_product(x, y); +} + +/** @brief Element-wise sum: x + y */ +template +__device__ __forceinline__ AccT sum(point x, point y) +{ + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + AccT result = 0; + for (int i = 0; i < x.size(); ++i) { + result += static_cast(x[i]) + static_cast(y[i]); + } + return result; + } else { + return x.raw() + y.raw(); + } +} + +/** @brief Maximum element: max(x, y) */ +template +__device__ __forceinline__ AccT max_elem(point x, point y) +{ + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + AccT result = 0; + for (int i = 0; i < x.size(); ++i) { + auto xi = static_cast(x[i]); + auto yi = static_cast(y[i]); + auto val = (xi > yi) ? xi : yi; + if (val > result) result = val; + } + return result; + } else { + auto a = x.raw(); + auto b = y.raw(); + return (a > b) ? a : b; + } +} + +// ============================================================================ +// String versions for JIT compilation +// ============================================================================ + +constexpr std::string_view point_code = R"( +template +struct point { + using element_type = T; + using storage_type = AccT; + static constexpr int veclen = Veclen; + + storage_type data_; + + __device__ __host__ point() = default; + __device__ __host__ explicit point(storage_type d) : data_(d) {} + + __device__ __forceinline__ storage_type raw() const { return data_; } + __device__ __forceinline__ storage_type& raw() { return data_; } + + __device__ __host__ static constexpr int size() + { + if constexpr ((std::is_same_v || std::is_same_v) && Veclen > 1) { + return 4; + } else { + return 1; + } + } + + __device__ __host__ static constexpr bool is_packed() + { + return (std::is_same_v || std::is_same_v) && Veclen > 1; + } + + __device__ __forceinline__ T operator[](int i) const + { + if constexpr (std::is_same_v && Veclen > 1) { + return static_cast((data_ >> (i * 8)) & 0xFF); + } else if constexpr (std::is_same_v && Veclen > 1) { + return static_cast((data_ >> (i * 8)) & 0xFF); + } else { + (void)i; + return static_cast(data_); + } + } +}; +)"; + +constexpr std::string_view metric_interface_code = R"( +template +struct metric_interface { + using point_type = point; + + virtual __device__ void operator()(AccT& acc, point_type x, point_type y) = 0; + virtual __device__ ~metric_interface() = default; +}; +)"; + +constexpr std::string_view squared_diff_code = R"( +template +__device__ __forceinline__ AccT squared_diff(point x, point y) +{ + if constexpr (std::is_same_v && V > 1) { + auto diff = __vabsdiffu4(x.raw(), y.raw()); + return __dp4a(diff, diff, AccT{0}); + } else if constexpr (std::is_same_v && V > 1) { + auto diff = __vabsdiffs4(x.raw(), y.raw()); + return __dp4a(diff, diff, static_cast(0)); + } else { + auto diff = x.raw() - y.raw(); + return diff * diff; + } +} +)"; + +constexpr std::string_view abs_diff_code = R"( +template +__device__ __forceinline__ AccT abs_diff(point x, point y) +{ + if constexpr (std::is_same_v && V > 1) { + auto diff = __vabsdiffu4(x.raw(), y.raw()); + return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + + ((diff >> 24) & 0xFF); + } else if constexpr (std::is_same_v && V > 1) { + auto diff = __vabsdiffs4(x.raw(), y.raw()); + return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + + ((diff >> 24) & 0xFF); + } else { + auto a = x.raw(); + auto b = y.raw(); + return (a > b) ? (a - b) : (b - a); + } +} +)"; + +constexpr std::string_view dot_product_code = R"( +template +__device__ __forceinline__ AccT dot_product(point x, point y) +{ + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + return __dp4a(x.raw(), y.raw(), AccT{0}); + } else { + return x.raw() * y.raw(); + } +} +)"; + +constexpr std::string_view product_code = R"( +template +__device__ __forceinline__ AccT product(point x, point y) +{ + return dot_product(x, y); +} +)"; + +constexpr std::string_view sum_code = R"( +template +__device__ __forceinline__ AccT sum(point x, point y) +{ + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + AccT result = 0; + for (int i = 0; i < x.size(); ++i) { + result += static_cast(x[i]) + static_cast(y[i]); + } + return result; + } else { + return x.raw() + y.raw(); + } +} +)"; + +constexpr std::string_view max_elem_code = R"( +template +__device__ __forceinline__ AccT max_elem(point x, point y) +{ + if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { + AccT result = 0; + for (int i = 0; i < x.size(); ++i) { + auto xi = static_cast(x[i]); + auto yi = static_cast(y[i]); + auto val = (xi > yi) ? xi : yi; + if (val > result) result = val; + } + return result; + } else { + auto a = x.raw(); + auto b = y.raw(); + return (a > b) ? a : b; + } +} +)"; + /** - * @brief Squared difference: (x - y)² - * - * Optimized for packed int8/uint8, falls back to scalar for float/half. - */ -const std::string_view squared_diff_code = R"( - template - __device__ __forceinline__ AccT squared_diff(point x, point y) - { - if constexpr (std::is_same_v && V > 1) { - // SIMD: 4 packed unsigned bytes - auto diff = __vabsdiffu4(x.raw(), y.raw()); - return __dp4a(diff, diff, AccT{0}); - } else if constexpr (std::is_same_v && V > 1) { - // SIMD: 4 packed signed bytes - auto diff = __vabsdiffs4(x.raw(), y.raw()); - return __dp4a(diff, diff, static_cast(0)); - } else { - // Scalar: float, half, or byte with Veclen==1 - auto diff = x.raw() - y.raw(); - return diff * diff; - } - } - )"; -/** - * @brief Absolute difference: |x - y| - * - * For packed types, returns sum of absolute differences. - */ -const std::string_view abs_diff_code = R"( - template - __device__ __forceinline__ AccT abs_diff(point x, point y) - { - if constexpr (std::is_same_v && V > 1) { - // SIMD: sum of 4 unsigned absolute differences - auto diff = __vabsdiffu4(x.raw(), y.raw()); - // Sum the 4 bytes - return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + - ((diff >> 24) & 0xFF); - } else if constexpr (std::is_same_v && V > 1) { - // SIMD: sum of 4 signed absolute differences - auto diff = __vabsdiffs4(x.raw(), y.raw()); - return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + - ((diff >> 24) & 0xFF); - } else { - // Scalar - auto a = x.raw(); - auto b = y.raw(); - return (a > b) ? (a - b) : (b - a); - } - } - )"; -/** - * @brief Dot product: x · y - * - * For packed types, computes sum of element-wise products. - */ -const std::string_view dot_product_code = R"( - template - __device__ __forceinline__ AccT dot_product(point x, point y) - { - if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { - // SIMD: dp4a computes dot product of 4 packed bytes - return __dp4a(x.raw(), y.raw(), AccT{0}); - } else { - // Scalar - return x.raw() * y.raw(); - } - } - )"; -/** - * @brief Element-wise product: x * y - * - * For packed types, returns sum of element-wise products (same as dot_product). - */ -const std::string_view product_code = R"( - template - __device__ __forceinline__ AccT product(point x, point y) - { - return dot_product(x, y); - } - )"; -/** - * @brief Element-wise sum: x + y - */ -const std::string_view sum_code = R"( - template - __device__ __forceinline__ AccT sum(point x, point y) - { - if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { - // Sum all unpacked elements - AccT result = 0; - for (int i = 0; i < x.size(); ++i) { - result += static_cast(x[i]) + static_cast(y[i]); - } - return result; - } else { - return x.raw() + y.raw(); - } - } - )"; -/** - * @brief Maximum element: max(x, y) - * - * For packed types, returns max across all element pairs. - */ -const std::string_view max_elem_code = R"( - template - __device__ __forceinline__ AccT max_elem(point x, point y) - { - if constexpr ((std::is_same_v || std::is_same_v) && V > 1) { - AccT result = 0; - for (int i = 0; i < x.size(); ++i) { - auto xi = static_cast(x[i]); - auto yi = static_cast(y[i]); - auto val = (xi > yi) ? xi : yi; - if (val > result) result = val; - } - return result; - } else { - auto a = x.raw(); - auto b = y.raw(); - return (a > b) ? a : b; - } - } - )"; -/** - * @brief Base interface for custom distance metrics. - * - * Inherit from this interface to get compile-time enforcement of the - * correct operator() signature via the `override` keyword. - * - * If you forget to implement operator() or use the wrong signature, - * you'll get a clear compile error: "does not override any member function" + * @brief Preamble code for JIT compilation. * - * @tparam T Data type (float, __half, int8_t, uint8_t) - * @tparam AccT Accumulator type (float, __half, int32_t, uint32_t) - * @tparam Veclen Vector length (handled by cuVS internally) - * - * @note x and y are point which provides: - * - .raw() : packed storage for power users - * - operator[] : unpacked element access - * - ::veclen : compile-time Veclen - * - ::is_packed() : whether data is packed - */ -const std::string_view metric_interface_code = R"( - template - struct metric_interface { - using point_type = point; - - /** - * @brief Compute distance contribution for one element pair. - * - * @param[in,out] acc Accumulated distance value - * @param[in] x Query vector element (point wrapper) - * @param[in] y Database vector element (point wrapper) - * - * Example: - * // Simple - use helpers (recommended): - * acc += squared_diff(x, y); - * - * // Array access for custom logic: - * for (int i = 0; i < x.size(); ++i) { - * acc += x[i] * y[i]; - * } - * - * // Power user - raw access: - * if constexpr (point_type::is_packed()) { - * acc = __dp4a(x.raw(), y.raw(), acc); - * } - */ - virtual __device__ void operator()(AccT& acc, point_type x, point_type y) = 0; - - virtual __device__ ~metric_interface() = default; - }; - )"; + * nvrtc doesn't have access to standard library headers, so we define + * the necessary types and utilities inline. + */ +constexpr std::string_view jit_preamble_code = R"( +/* Fixed-width integer types for nvrtc */ +using int8_t = signed char; +using uint8_t = unsigned char; +using int32_t = int; +using uint32_t = unsigned int; +using int64_t = long long; +using uint64_t = unsigned long long; + +/* std::is_same_v implementation for nvrtc */ +namespace std { +template struct is_same { static constexpr bool value = false; }; +template struct is_same { static constexpr bool value = true; }; +template inline constexpr bool is_same_v = is_same::value; +} +)"; /** * @brief Define a custom distance metric with compile-time validation. @@ -3327,14 +3398,14 @@ const std::string_view metric_interface_code = R"( * x.is_packed() - Whether data is packed (constexpr) * * Helper functions (Veclen deduced automatically!): - * cuvs::udf::squared_diff(x, y) - (x-y)² optimized for all types - * cuvs::udf::abs_diff(x, y) - |x-y| optimized for all types - * cuvs::udf::dot_product(x, y) - x·y optimized for all types - * cuvs::udf::product(x, y) - element-wise product + * squared_diff(x, y) - (x-y)² optimized for all types + * abs_diff(x, y) - |x-y| optimized for all types + * dot_product(x, y) - x·y optimized for all types + * product(x, y) - element-wise product * * Example: * CUVS_METRIC(my_l2, { - * acc += cuvs::udf::squared_diff(x, y); // Just works for all types! + * acc += squared_diff(x, y); // Just works for all types! * }) * * CUVS_METRIC(my_chebyshev, { @@ -3346,8 +3417,8 @@ const std::string_view metric_interface_code = R"( */ #define CUVS_METRIC(NAME, BODY) \ template \ - struct NAME : cuvs::udf::metric_interface { \ - using point_type = cuvs::udf::point; \ + struct NAME : cuvs::neighbors::ivf_flat::udf::metric_interface { \ + using point_type = cuvs::neighbors::ivf_flat::udf::point; \ __device__ void operator()(AccT& acc, point_type x, point_type y) override { BODY } \ }; \ \ @@ -3355,6 +3426,7 @@ const std::string_view metric_interface_code = R"( { \ using namespace cuvs::neighbors::ivf_flat::udf; \ std::string result; \ + result += jit_preamble_code; \ result += point_code; \ result += squared_diff_code; \ result += abs_diff_code; \ @@ -3371,17 +3443,18 @@ struct )" #NAME R"( : metric_interface { )" #BODY R"( \ }; \ \ +namespace cuvs { namespace neighbors { namespace ivf_flat { namespace detail { \ template \ __device__ void compute_dist(AccT& acc, AccT x, AccT y) \ { \ - )" #NAME R"( metric; \ - metric(acc, point(x), point(y)); \ + ::)" #NAME R"( metric; \ + metric(acc, ::point(x), ::point(y)); \ } \ +}}}} \ )"; \ return result; \ } -void compile_metric(std::string const& code); } // namespace udf } // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 693e2ee685..d28110c370 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -14,6 +14,7 @@ #include #include +#include #include "cuda_runtime.h" #include "nvJitLink.h" @@ -73,6 +74,7 @@ std::shared_ptr AlgorithmPlanner::build() check_nvjitlink_result(handle, result); for (auto& frag : this->fragments) { + std::cout << "Adding fragment: " << frag->compute_key << std::endl; frag->add_to(handle); } diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index 32ff738fcb..f0af0369a3 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -9,7 +9,7 @@ #include #include "cuda.h" -#include "nvrtc.h" +#include #define NVRTC_SAFE_CALL(_call) \ do { \ @@ -21,7 +21,7 @@ } \ } while (0) -NRTCLTOFragmentCompiler::NRTCLTOFragmentCompiler() +NVRTCLTOFragmentCompiler::NVRTCLTOFragmentCompiler() { int device = 0; int major = 0; @@ -40,15 +40,22 @@ NRTCLTOFragmentCompiler::NRTCLTOFragmentCompiler() this->standard_compile_opts[i++] = std::string{"-default-device"}; } -void NRTCLTOFragmentCompiler::compile(std::string const& key, std::string const& code) const +void NVRTCLTOFragmentCompiler::compile(std::string const& key, std::string const& code) const { nvrtcProgram prog; NVRTC_SAFE_CALL( nvrtcCreateProgram(&prog, code.c_str(), "nvrtc_lto_fragment", 0, nullptr, nullptr)); - nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog - this->standard_compile_opts.size(), // numOptions - this->standard_compile_opts.data()); // options + // Convert std::vector to std::vector for nvrtc API + std::vector opts; + opts.reserve(this->standard_compile_opts.size()); + for (const auto& opt : this->standard_compile_opts) { + opts.push_back(opt.c_str()); + } + + nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog + opts.size(), // numOptions + opts.data()); // options if (compileResult != NVRTC_SUCCESS) { // Obtain compilation log from the program. diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index 4c0bb3644a..1324a1d41a 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -1293,6 +1293,7 @@ struct select_interleaved_scan_kernel { */ template void ivfflat_interleaved_scan(const index& index, + const search_params& params, const T* queries, const uint32_t* coarse_query_results, const uint32_t n_queries, diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh index 81833a63b1..cfb4982adf 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_explicit_inst.cuh @@ -23,6 +23,7 @@ typename cuvs::spatial::knn::detail::utils::config::value_t, \ IdxT, \ SampleFilterT>(const index& index, \ + const search_params& params, \ const T* queries, \ const uint32_t* coarse_query_results, \ const uint32_t n_queries, \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_ext.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_ext.cuh index 1d63c52adb..e293362327 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_ext.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_ext.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -17,6 +17,7 @@ namespace cuvs::neighbors::ivf_flat::detail { template void ivfflat_interleaved_scan(const index& index, + const search_params& params, const T* queries, const uint32_t* coarse_query_results, const uint32_t n_queries, @@ -39,6 +40,7 @@ void ivfflat_interleaved_scan(const index& index, typename cuvs::spatial::knn::detail::utils::config::value_t, \ IdxT, \ SampleFilterT>(const index& index, \ + const search_params& params, \ const T* queries, \ const uint32_t* coarse_query_results, \ const uint32_t n_queries, \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index be8652dd59..8dc735f03e 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -8,6 +8,7 @@ #include "../ivf_common.cuh" #include "jit_lto_kernels/interleaved_scan_planner.hpp" #include +#include #include #include #include @@ -52,6 +53,19 @@ constexpr auto get_idx_type_tag() if constexpr (std::is_same_v) { return tag_idx_l{}; } } +// Convert type to string for JIT code generation +template +constexpr const char* type_name() +{ + if constexpr (std::is_same_v) { return "float"; } + if constexpr (std::is_same_v) { return "__half"; } + if constexpr (std::is_same_v) { return "int8_t"; } + if constexpr (std::is_same_v) { return "uint8_t"; } + if constexpr (std::is_same_v) { return "int32_t"; } + if constexpr (std::is_same_v) { return "uint32_t"; } + if constexpr (std::is_same_v) { return "int64_t"; } +} + template constexpr auto get_filter_type_tag() { @@ -71,9 +85,10 @@ constexpr auto get_metric_name() { if constexpr (std::is_same_v>) { return "euclidean"; - } - if constexpr (std::is_same_v>) { + } else if constexpr (std::is_same_v>) { return "inner_prod"; + } else if constexpr (std::is_same_v>) { + return "metric_udf"; } } @@ -128,6 +143,7 @@ template void launch_kernel(const index& index, + const search_params& params, const T* queries, const uint32_t* coarse_index, const uint32_t num_queries, @@ -153,9 +169,38 @@ void launch_kernel(const index& index, decltype(get_acc_type_tag()), decltype(get_idx_type_tag())>( Capacity, Veclen, Ascending, ComputeNorm); - kernel_planner.template add_metric_device_function()), - decltype(get_acc_type_tag())>( - get_metric_name(), Veclen); + if (params.metric_udf.has_value()) { + std::string metric_udf = params.metric_udf.value(); + // Add explicit template instantiation with actual types + metric_udf += "\ntemplate void cuvs::neighbors::ivf_flat::detail::compute_dist<"; + metric_udf += std::to_string(Veclen); + metric_udf += ", "; + metric_udf += type_name(); + metric_udf += ", "; + metric_udf += type_name(); + metric_udf += ">("; + metric_udf += type_name(); + metric_udf += "&, "; + metric_udf += type_name(); + metric_udf += ", "; + metric_udf += type_name(); + metric_udf += ");\n"; + // Include hash of UDF source in key to differentiate different UDFs + auto udf_hash = std::to_string(std::hash{}(metric_udf)); + std::string metric_name = "metric_udf_" + udf_hash; + NVRTCLTOFragmentCompiler nrtc_lto_compiler; + std::string key = + metric_name + "_" + std::to_string(Veclen) + "_" + + make_fragment_key()), decltype(get_acc_type_tag())>(); + nrtc_lto_compiler.compile(key, metric_udf); + kernel_planner.template add_metric_device_function()), + decltype(get_acc_type_tag())>( + metric_name, Veclen); + } else { + kernel_planner.template add_metric_device_function()), + decltype(get_acc_type_tag())>( + get_metric_name(), Veclen); + } kernel_planner.add_filter_device_function(get_filter_name()); kernel_planner.add_post_lambda_device_function(get_post_lambda_name()); auto kernel_launcher = kernel_planner.get_launcher(); @@ -289,6 +334,17 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg tag_post_compose>( std::forward(args)...); // NB: update the description of `knn::ivf_flat::build` when // adding here a new metric. + case cuvs::distance::DistanceType::CustomUDF: + return launch_kernel, + tag_post_identity>(std::forward(args)...); default: RAFT_FAIL("The chosen distance metric is not supported (%d)", int(metric)); } } @@ -390,6 +446,7 @@ struct select_interleaved_scan_kernel { */ template void ivfflat_interleaved_scan(const index& index, + const search_params& params, const T* queries, const uint32_t* coarse_query_results, const uint32_t n_queries, @@ -424,6 +481,7 @@ void ivfflat_interleaved_scan(const index& index, select_min, metric, index, + params, queries, coarse_query_results, n_queries, diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh index 03de3eb791..7d84ece485 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -26,6 +26,8 @@ #include +#include + namespace cuvs::neighbors::ivf_flat::detail { using namespace cuvs::spatial::knn::detail; // NOLINT @@ -38,6 +40,7 @@ auto RAFT_WEAK_FUNCTION is_local_topk_feasible(uint32_t k) -> bool template void search_impl(raft::resources const& handle, const cuvs::neighbors::ivf_flat::index& index, + const search_params& params, const T* queries, uint32_t n_queries, uint32_t queries_offset, @@ -184,6 +187,7 @@ void search_impl(raft::resources const& handle, // query the gridDimX size to store probes topK output ivfflat_interleaved_scan::value_t, IdxT, IvfSampleFilterT>( index, + params, nullptr, nullptr, n_queries, @@ -239,6 +243,7 @@ void search_impl(raft::resources const& handle, ivfflat_interleaved_scan::value_t, IdxT, IvfSampleFilterT>( index, + params, queries, coarse_indices_dev.data(), n_queries, @@ -344,6 +349,7 @@ inline void search_with_filtering(raft::resources const& handle, search_impl(handle, index, + params, queries + offset_q * index.dim(), queries_batch, offset_q, @@ -377,6 +383,11 @@ void search_with_filtering(raft::resources const& handle, RAFT_EXPECTS(queries.extent(1) == index.dim(), "Number of query dimensions should equal number of dimensions in the index."); + if (params.metric_udf.has_value()) { + const_cast>&>(index).set_metric( + cuvs::distance::DistanceType::CustomUDF); + } + search_with_filtering(handle, params, index, diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp deleted file mode 100644 index c56aed94ac..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_udf.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include - -namespace cuvs::neighbors::ivf_flat::udf { - -void compile_metric(std::string const& code) -{ - NVRTCLTOFragmentCompiler compiler; - compiler.compile("IVF_FLAT_SEARCH_METRIC_UDF", code); -} - -} // namespace cuvs::neighbors::ivf_flat::udf diff --git a/cpp/src/neighbors/ivf_flat_index.cpp b/cpp/src/neighbors/ivf_flat_index.cpp index 77b24d4690..6ab162a117 100644 --- a/cpp/src/neighbors/ivf_flat_index.cpp +++ b/cpp/src/neighbors/ivf_flat_index.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -60,6 +60,12 @@ cuvs::distance::DistanceType index::metric() const noexcept return metric_; } +template +void index::set_metric(cuvs::distance::DistanceType metric) +{ + metric_ = metric; +} + template bool index::adaptive_centers() const noexcept { diff --git a/cpp/src/neighbors/refine/refine_device.cuh b/cpp/src/neighbors/refine/refine_device.cuh index b81c0b2b2a..403439cd56 100644 --- a/cpp/src/neighbors/refine/refine_device.cuh +++ b/cpp/src/neighbors/refine/refine_device.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -106,6 +106,7 @@ void refine_device( cuvs::neighbors::ivf_flat::detail::ivfflat_interleaved_scan( refinement_index, + cuvs::neighbors::ivf_flat::search_params(), queries.data_handle(), fake_coarse_idx.data(), static_cast(n_queries), diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 41894e92ce..45ae1dc88d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -131,6 +131,13 @@ ConfigureTest( PERCENT 100 ) +ConfigureTest( + NAME NEIGHBORS_ANN_IVF_FLAT_UDF_TEST + PATH neighbors/ann_ivf_flat/test_udf.cu + GPUS 1 + PERCENT 100 +) + ConfigureTest( NAME NEIGHBORS_ANN_IVF_PQ_TEST PATH neighbors/ann_ivf_pq/test_float_int64_t.cu neighbors/ann_ivf_pq/test_int8_t_int64_t.cu diff --git a/cpp/tests/neighbors/ann_ivf_flat/test_udf.cu b/cpp/tests/neighbors/ann_ivf_flat/test_udf.cu new file mode 100644 index 0000000000..ec13759ad2 --- /dev/null +++ b/cpp/tests/neighbors/ann_ivf_flat/test_udf.cu @@ -0,0 +1,325 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace cuvs::neighbors::ivf_flat { + +// ============================================================================ +// Define custom metrics using the UDF macro +// ============================================================================ + +// Custom L2 (squared Euclidean) metric - should match built-in L2 +CUVS_METRIC(custom_l2, { acc += squared_diff(x, y); }) + +// Custom inner product metric - should match built-in InnerProduct +// Note: Built-in uses negative inner product (larger similarity = smaller distance) +CUVS_METRIC(custom_inner_product, { acc -= dot_product(x, y); }) + +// Custom L1 (Manhattan) metric +CUVS_METRIC(custom_l1, { acc += abs_diff(x, y); }) + +// ============================================================================ +// Test fixture +// ============================================================================ + +class IvfFlatUdfTest : public ::testing::Test { + protected: + void SetUp() override + { + // Hardcoded 2D dataset for easy manual verification + // 8 database vectors in 4 dimensions + // + // Vectors arranged so we can easily verify distances: + // db[0] = [0, 0, 0, 0] - origin + // db[1] = [1, 0, 0, 0] - unit along x + // db[2] = [0, 1, 0, 0] - unit along y + // db[3] = [0, 0, 1, 0] - unit along z + // db[4] = [1, 1, 0, 0] - diagonal in xy + // db[5] = [2, 0, 0, 0] - 2 units along x + // db[6] = [1, 1, 1, 1] - all ones + // db[7] = [3, 4, 0, 0] - for 3-4-5 triangle verification + // + database_ = { + 0.0f, 0.0f, 0.0f, 0.0f, // db[0]: origin + 1.0f, 0.0f, 0.0f, 0.0f, // db[1]: L2 dist from origin = 1 + 0.0f, 1.0f, 0.0f, 0.0f, // db[2]: L2 dist from origin = 1 + 0.0f, 0.0f, 1.0f, 0.0f, // db[3]: L2 dist from origin = 1 + 1.0f, 1.0f, 0.0f, 0.0f, // db[4]: L2 dist from origin = sqrt(2) ≈ 1.414 + 2.0f, 0.0f, 0.0f, 0.0f, // db[5]: L2 dist from origin = 2 + 1.0f, 1.0f, 1.0f, 1.0f, // db[6]: L2 dist from origin = 2 + 3.0f, 4.0f, 0.0f, 0.0f, // db[7]: L2 dist from origin = 5 + }; + + // Query vectors + // query[0] = origin - nearest neighbors should be db[0], then db[1,2,3] (all dist=1) + // query[1] = [1,0,0,0] - nearest is db[1] (dist=0), then db[0,4] (dist=1) + queries_ = { + 0.0f, + 0.0f, + 0.0f, + 0.0f, // query[0]: origin + 1.0f, + 0.0f, + 0.0f, + 0.0f, // query[1]: same as db[1] + }; + + num_db_vecs_ = 8; + num_queries_ = 2; + dim_ = 4; + k_ = 4; + n_lists_ = 2; // Small number for this tiny dataset + n_probes_ = 2; // Search all clusters + } + + raft::resources handle_; + std::vector database_; + std::vector queries_; + int64_t num_db_vecs_; + int64_t num_queries_; + int64_t dim_; + int64_t k_; + uint32_t n_lists_; + uint32_t n_probes_; +}; + +// ============================================================================ +// Test: UDF L2 metric matches built-in L2 +// ============================================================================ + +TEST_F(IvfFlatUdfTest, CustomL2MatchesBuiltIn) +{ + auto stream = raft::resource::get_cuda_stream(handle_); + + // Copy data to device + rmm::device_uvector d_database(num_db_vecs_ * dim_, stream); + rmm::device_uvector d_queries(num_queries_ * dim_, stream); + raft::copy(d_database.data(), database_.data(), database_.size(), stream); + raft::copy(d_queries.data(), queries_.data(), queries_.size(), stream); + + auto database_view = + raft::make_device_matrix_view(d_database.data(), num_db_vecs_, dim_); + auto queries_view = + raft::make_device_matrix_view(d_queries.data(), num_queries_, dim_); + + // Build index with L2 metric + ivf_flat::index_params index_params; + index_params.n_lists = n_lists_; + index_params.metric = cuvs::distance::DistanceType::L2Expanded; + + auto idx = ivf_flat::build(handle_, index_params, database_view); + + // Allocate output buffers + rmm::device_uvector d_indices_builtin(num_queries_ * k_, stream); + rmm::device_uvector d_distances_builtin(num_queries_ * k_, stream); + rmm::device_uvector d_indices_udf(num_queries_ * k_, stream); + rmm::device_uvector d_distances_udf(num_queries_ * k_, stream); + + auto indices_builtin_view = + raft::make_device_matrix_view(d_indices_builtin.data(), num_queries_, k_); + auto distances_builtin_view = + raft::make_device_matrix_view(d_distances_builtin.data(), num_queries_, k_); + auto indices_udf_view = + raft::make_device_matrix_view(d_indices_udf.data(), num_queries_, k_); + auto distances_udf_view = + raft::make_device_matrix_view(d_distances_udf.data(), num_queries_, k_); + + // Search with built-in metric + ivf_flat::search_params search_params_builtin; + search_params_builtin.n_probes = n_probes_; + + ivf_flat::search(handle_, + search_params_builtin, + idx, + queries_view, + indices_builtin_view, + distances_builtin_view); + + // Search with custom UDF metric + ivf_flat::search_params search_params_udf; + search_params_udf.n_probes = n_probes_; + search_params_udf.metric_udf = custom_l2_udf(); + + ivf_flat::search( + handle_, search_params_udf, idx, queries_view, indices_udf_view, distances_udf_view); + + // Copy results to host + std::vector h_indices_builtin(num_queries_ * k_); + std::vector h_distances_builtin(num_queries_ * k_); + std::vector h_indices_udf(num_queries_ * k_); + std::vector h_distances_udf(num_queries_ * k_); + + raft::copy(h_indices_builtin.data(), d_indices_builtin.data(), num_queries_ * k_, stream); + raft::copy(h_distances_builtin.data(), d_distances_builtin.data(), num_queries_ * k_, stream); + raft::copy(h_indices_udf.data(), d_indices_udf.data(), num_queries_ * k_, stream); + raft::copy(h_distances_udf.data(), d_distances_udf.data(), num_queries_ * k_, stream); + raft::resource::sync_stream(handle_); + + // Verify UDF results match built-in results + for (int64_t i = 0; i < num_queries_ * k_; ++i) { + EXPECT_EQ(h_indices_udf[i], h_indices_builtin[i]) + << "Index mismatch at position " << i << ": UDF=" << h_indices_udf[i] + << ", builtin=" << h_indices_builtin[i]; + EXPECT_NEAR(h_distances_udf[i], h_distances_builtin[i], 1e-5f) + << "Distance mismatch at position " << i << ": UDF=" << h_distances_udf[i] + << ", builtin=" << h_distances_builtin[i]; + } + + // Additional verification: check expected distances for query[0] (origin) + // The nearest neighbor should be db[0] (origin) with distance 0 + EXPECT_EQ(h_indices_udf[0], 0) << "Nearest to origin should be db[0]"; + EXPECT_NEAR(h_distances_udf[0], 0.0f, 1e-5f) << "Distance from origin to origin should be 0"; +} + +// ============================================================================ +// Test: UDF produces correct L2 distances (manual verification) +// ============================================================================ + +TEST_F(IvfFlatUdfTest, CustomL2CorrectDistances) +{ + auto stream = raft::resource::get_cuda_stream(handle_); + + // Copy data to device + rmm::device_uvector d_database(num_db_vecs_ * dim_, stream); + rmm::device_uvector d_queries(num_queries_ * dim_, stream); + raft::copy(d_database.data(), database_.data(), database_.size(), stream); + raft::copy(d_queries.data(), queries_.data(), queries_.size(), stream); + + auto database_view = + raft::make_device_matrix_view(d_database.data(), num_db_vecs_, dim_); + auto queries_view = + raft::make_device_matrix_view(d_queries.data(), num_queries_, dim_); + + // Build index + ivf_flat::index_params index_params; + index_params.n_lists = n_lists_; + index_params.metric = cuvs::distance::DistanceType::L2Expanded; + + auto idx = ivf_flat::build(handle_, index_params, database_view); + + // Allocate output + rmm::device_uvector d_indices(num_queries_ * k_, stream); + rmm::device_uvector d_distances(num_queries_ * k_, stream); + + auto indices_view = + raft::make_device_matrix_view(d_indices.data(), num_queries_, k_); + auto distances_view = + raft::make_device_matrix_view(d_distances.data(), num_queries_, k_); + + // Search with UDF + ivf_flat::search_params search_params; + search_params.n_probes = n_probes_; + search_params.metric_udf = custom_l2_udf(); + + ivf_flat::search(handle_, search_params, idx, queries_view, indices_view, distances_view); + + // Copy to host + std::vector h_indices(num_queries_ * k_); + std::vector h_distances(num_queries_ * k_); + raft::copy(h_indices.data(), d_indices.data(), num_queries_ * k_, stream); + raft::copy(h_distances.data(), d_distances.data(), num_queries_ * k_, stream); + raft::resource::sync_stream(handle_); + + // Verify query[1] = [1,0,0,0] + // Expected: db[1] at distance 0 (exact match) + // db[0] at distance 1 (squared L2) + // db[4]=[1,1,0,0] at distance 1 (squared L2) + // db[2]=[0,1,0,0] at distance 2 (squared L2) + int64_t q1_offset = k_; // Results for query[1] start at index k_ + EXPECT_EQ(h_indices[q1_offset], 1) << "Query[1] nearest should be db[1] (exact match)"; + EXPECT_NEAR(h_distances[q1_offset], 0.0f, 1e-5f) << "Distance should be 0 for exact match"; +} + +// ============================================================================ +// Test: Inner product UDF +// ============================================================================ + +TEST_F(IvfFlatUdfTest, CustomInnerProductMatchesBuiltIn) +{ + auto stream = raft::resource::get_cuda_stream(handle_); + + // Copy data to device + rmm::device_uvector d_database(num_db_vecs_ * dim_, stream); + rmm::device_uvector d_queries(num_queries_ * dim_, stream); + raft::copy(d_database.data(), database_.data(), database_.size(), stream); + raft::copy(d_queries.data(), queries_.data(), queries_.size(), stream); + + auto database_view = + raft::make_device_matrix_view(d_database.data(), num_db_vecs_, dim_); + auto queries_view = + raft::make_device_matrix_view(d_queries.data(), num_queries_, dim_); + + // Build index with InnerProduct metric + ivf_flat::index_params index_params; + index_params.n_lists = n_lists_; + index_params.metric = cuvs::distance::DistanceType::InnerProduct; + + auto idx = ivf_flat::build(handle_, index_params, database_view); + + // Allocate output buffers + rmm::device_uvector d_indices_builtin(num_queries_ * k_, stream); + rmm::device_uvector d_distances_builtin(num_queries_ * k_, stream); + rmm::device_uvector d_indices_udf(num_queries_ * k_, stream); + rmm::device_uvector d_distances_udf(num_queries_ * k_, stream); + + auto indices_builtin_view = + raft::make_device_matrix_view(d_indices_builtin.data(), num_queries_, k_); + auto distances_builtin_view = + raft::make_device_matrix_view(d_distances_builtin.data(), num_queries_, k_); + auto indices_udf_view = + raft::make_device_matrix_view(d_indices_udf.data(), num_queries_, k_); + auto distances_udf_view = + raft::make_device_matrix_view(d_distances_udf.data(), num_queries_, k_); + + // Search with built-in metric + ivf_flat::search_params search_params_builtin; + search_params_builtin.n_probes = n_probes_; + + ivf_flat::search(handle_, + search_params_builtin, + idx, + queries_view, + indices_builtin_view, + distances_builtin_view); + + // Search with custom UDF metric + ivf_flat::search_params search_params_udf; + search_params_udf.n_probes = n_probes_; + search_params_udf.metric_udf = custom_inner_product_udf(); + + ivf_flat::search( + handle_, search_params_udf, idx, queries_view, indices_udf_view, distances_udf_view); + + // Copy results to host + std::vector h_indices_builtin(num_queries_ * k_); + std::vector h_distances_builtin(num_queries_ * k_); + std::vector h_indices_udf(num_queries_ * k_); + std::vector h_distances_udf(num_queries_ * k_); + + raft::copy(h_indices_builtin.data(), d_indices_builtin.data(), num_queries_ * k_, stream); + raft::copy(h_distances_builtin.data(), d_distances_builtin.data(), num_queries_ * k_, stream); + raft::copy(h_indices_udf.data(), d_indices_udf.data(), num_queries_ * k_, stream); + raft::copy(h_distances_udf.data(), d_distances_udf.data(), num_queries_ * k_, stream); + raft::resource::sync_stream(handle_); + + // Verify UDF results match built-in results + for (int64_t i = 0; i < num_queries_ * k_; ++i) { + EXPECT_EQ(h_indices_udf[i], h_indices_builtin[i]) << "Index mismatch at position " << i; + EXPECT_NEAR(h_distances_udf[i], h_distances_builtin[i], 1e-5f) + << "Distance mismatch at position " << i; + } +} + +} // namespace cuvs::neighbors::ivf_flat diff --git a/examples/cpp/src/udf_chebyshev_metric.cu b/examples/cpp/src/udf_chebyshev_metric.cu deleted file mode 100644 index 59205536d7..0000000000 --- a/examples/cpp/src/udf_chebyshev_metric.cu +++ /dev/null @@ -1,93 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -/** - * @file udf_chebyshev_metric.cu - * @brief Example: Chebyshev (L∞) distance metric - * - * Chebyshev distance = max absolute difference across dimensions: - * d(x, y) = max_i |x_i - y_i| - * - * This example shows how to use element access for custom reduction logic. - */ - -#include -#include -#include - -#include - -// ============================================================ -// Chebyshev (L∞) Distance -// ============================================================ -// -// Unlike L2 (sum of squares) or L1 (sum of abs), Chebyshev -// tracks the MAXIMUM absolute difference seen so far. -// -// Uses element access x[i], y[i] for custom reduction. - -CUVS_METRIC(chebyshev_distance, { - for (int i = 0; i < x.size(); ++i) { - auto xi = x[i]; - auto yi = y[i]; - auto diff = (xi > yi) ? (xi - yi) : (yi - xi); - if (diff > acc) { acc = static_cast(diff); } - } -}) - -// ============================================================ -// Weighted L1 Distance - using helper -// ============================================================ - -CUVS_METRIC(weighted_l1, { - acc += cuvs::udf::abs_diff(x, y) * AccT{2.5}; // Custom weight -}) - -// ============================================================ -// Squared L2 (Euclidean) Distance - using helper -// ============================================================ - -CUVS_METRIC(squared_l2, { acc += cuvs::udf::squared_diff(x, y); }) - -// ============================================================ -// Minkowski Distance (p=3) - using element access -// ============================================================ - -CUVS_METRIC(minkowski_p3, { - for (int i = 0; i < x.size(); ++i) { - auto xi = x[i]; - auto yi = y[i]; - auto diff = (xi > yi) ? (xi - yi) : (yi - xi); - acc += diff * diff * diff; // |x-y|³ - } -}) - -int main() -{ - std::cout << "=== cuVS UDF Distance Metrics ===\n\n"; - - std::cout << "Defined metrics:\n"; - std::cout << " 1. chebyshev_distance - L∞ norm (max absolute diff)\n"; - std::cout << " 2. weighted_l1 - Weighted L1 distance\n"; - std::cout << " 3. squared_l2 - Standard squared Euclidean\n"; - std::cout << " 4. minkowski_p3 - Minkowski with p=3\n\n"; - - std::cout << "Usage:\n"; - std::cout << " params.udf.metric = chebyshev_distance_udf();\n"; - std::cout << " params.udf.metric = weighted_l1_udf();\n"; - std::cout << " params.udf.metric = squared_l2_udf();\n"; - std::cout << " params.udf.metric = minkowski_p3_udf();\n\n"; - - std::cout << "Two approaches for custom metrics:\n"; - std::cout << " 1. Use helpers: acc += cuvs::udf::squared_diff(x, y);\n"; - std::cout << " 2. Use element access: for (int i = 0; i < x.size(); ++i) {...}\n\n"; - - std::cout << "Use cases for Chebyshev distance:\n"; - std::cout << " - Image similarity (max pixel deviation)\n"; - std::cout << " - Quality control (worst-case tolerance)\n"; - std::cout << " - Game AI (king's movement on chessboard)\n"; - - return 0; -} diff --git a/examples/cpp/src/udf_int8_metric.cu b/examples/cpp/src/udf_int8_metric.cu deleted file mode 100644 index e698989ac7..0000000000 --- a/examples/cpp/src/udf_int8_metric.cu +++ /dev/null @@ -1,117 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -/** - * @file udf_int8_metric.cu - * @brief Example: int8/uint8 metrics - now EASY with point wrapper! - * - * The point wrapper makes int8/uint8 metrics trivial. - * No more manual intrinsics or if constexpr branches! - */ - -#include -#include -#include - -#include - -// ============================================================ -// Universal L2 Distance - ONE LINE! -// ============================================================ -// -// The helper squared_diff() handles ALL types optimally: -// - float/half: simple scalar math -// - int8/uint8: SIMD intrinsics (__vabsdiffs4, __dp4a) -// -// You don't need to know about packed types or intrinsics! - -CUVS_METRIC(universal_l2, { - acc += cuvs::udf::squared_diff(x, y); // Just works for everything! -}) - -// ============================================================ -// Universal L1 Distance - ONE LINE! -// ============================================================ - -CUVS_METRIC(universal_l1, { - acc += cuvs::udf::abs_diff(x, y); // Just works for everything! -}) - -// ============================================================ -// Universal Dot Product - ONE LINE! -// ============================================================ - -CUVS_METRIC(universal_dot, { - acc += cuvs::udf::dot_product(x, y); // Just works for everything! -}) - -// ============================================================ -// Custom logic using element access -// ============================================================ -// -// For custom logic, use x[i] and y[i] to access individual elements. -// The point wrapper handles unpacking automatically. - -CUVS_METRIC(custom_weighted_l2, { - // Access individual elements - works for all types - for (int i = 0; i < x.size(); ++i) { - auto diff = x[i] - y[i]; - auto weight = AccT{1} + AccT{i}; // Custom per-dimension weight - acc += weight * diff * diff; - } -}) - -// ============================================================ -// Power user: raw access with intrinsics -// ============================================================ -// -// For maximum performance, you can still use raw() and intrinsics. -// But now you don't HAVE to! - -CUVS_METRIC(power_user_l2, { - if constexpr (decltype(x)::is_packed()) { - // SIMD path - use intrinsics directly - auto diff = __vabsdiffs4(x.raw(), y.raw()); - acc = __dp4a(diff, diff, acc); - } else { - // Scalar path - auto diff = x.raw() - y.raw(); - acc += diff * diff; - } -}) - -int main() -{ - std::cout << "=== cuVS UDF int8/uint8 Metrics - Now Easy! ===\n\n"; - - std::cout << "OLD WAY (manual intrinsics):\n"; - std::cout << " if constexpr (std::is_same_v && Veclen > 1) {\n"; - std::cout << " auto diff = __vabsdiffs4(x, y); // Must know this!\n"; - std::cout << " acc = raft::dp4a(diff, diff, acc); // And this!\n"; - std::cout << " } else { ... }\n\n"; - - std::cout << "NEW WAY (with point wrapper):\n"; - std::cout << " acc += cuvs::udf::squared_diff(x, y); // Just works!\n\n"; - - std::cout << "Available helpers (auto-deduce Veclen):\n"; - std::cout << " squared_diff(x, y) - (x-y)² optimized for all types\n"; - std::cout << " abs_diff(x, y) - |x-y| optimized for all types\n"; - std::cout << " dot_product(x, y) - x·y optimized for all types\n"; - std::cout << " product(x, y) - element-wise product\n"; - std::cout << " sum(x, y) - element-wise sum\n"; - std::cout << " max_elem(x, y) - maximum element\n\n"; - - std::cout << "For custom logic, use element access:\n"; - std::cout << " for (int i = 0; i < x.size(); ++i) {\n"; - std::cout << " acc += custom_weight[i] * (x[i] - y[i]);\n"; - std::cout << " }\n\n"; - - std::cout << "Type info available at compile time:\n"; - std::cout << " x.size() - 4 for packed int8/uint8, 1 for float\n"; - std::cout << " x.is_packed() - true for int8/uint8 with Veclen > 1\n"; - std::cout << " x.raw() - raw storage for power users\n"; - - return 0; -} diff --git a/examples/cpp/src/udf_simple_metric.cu b/examples/cpp/src/udf_simple_metric.cu deleted file mode 100644 index 58aba84e53..0000000000 --- a/examples/cpp/src/udf_simple_metric.cu +++ /dev/null @@ -1,98 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -/** - * @file udf_simple_metric.cu - * @brief Simple example: Custom "Over 9000" L1 distance metric - * - * Shows the minimal code needed to define and use a custom metric. - */ - -#include -#include -#include - -#include - -// ============================================================ -// Define your metric - ONE LINE (plus the body)! -// ============================================================ -// -// The CUVS_METRIC macro: -// 1. Creates the actual struct (compiled, validated with `override`) -// 2. Generates awesome_over_9000_udf() function returning source string -// -// Available variables in body: -// acc - accumulated distance (AccT&, modify in-place) -// x, y - vector elements (point) -// -// x and y provide: -// x.raw() - raw packed storage (power users) -// x[i] - element access (unpacked) -// x.size() - number of elements -// -// Helpers (Veclen deduced automatically!): -// cuvs::udf::squared_diff(x, y) - optimal for all types -// cuvs::udf::abs_diff(x, y) - optimal for all types -// cuvs::udf::dot_product(x, y) - optimal for all types - -CUVS_METRIC(awesome_over_9000, { - // IT'S OVER 9000!!!! - // Works for ALL types - float, half, int8, uint8! - auto diff = cuvs::udf::abs_diff(x, y); - acc += diff * AccT{9001}; -}) - -// That's it! The macro handles: -// - Struct definition with proper inheritance -// - operator() signature with `override` for validation -// - Source string generation for JIT -// - point wrapping for clean API - -int main() -{ - std::cout << "=== cuVS UDF Simple Example ===\n\n"; - - // ============================================================ - // Use in search - // ============================================================ - - // raft::device_resources res; - // auto index = cuvs::neighbors::ivf_flat::deserialize(res, "index.bin"); - - // cuvs::neighbors::ivf_flat::search_params params; - // params.n_probes = 50; - - // Use the auto-generated _udf() function! - // params.udf.metric = awesome_over_9000_udf(); - - // cuvs::neighbors::ivf_flat::search(res, params, index, queries, neighbors, distances); - - // ============================================================ - // What happens under the hood - // ============================================================ - - std::cout << "User writes:\n"; - std::cout << " CUVS_METRIC(awesome_over_9000, {\n"; - std::cout << " auto diff = cuvs::udf::abs_diff(x, y);\n"; - std::cout << " acc += diff * AccT{9001};\n"; - std::cout << " })\n\n"; - - std::cout << "x and y are point which provides:\n"; - std::cout << " - x.raw() : packed storage for intrinsics\n"; - std::cout << " - x[i] : unpacked element access\n"; - std::cout << " - x.size() : number of elements (4 for packed int8, 1 for float)\n"; - std::cout << " - x.is_packed(): whether data is packed\n\n"; - - std::cout << "Helper functions deduce Veclen automatically:\n"; - std::cout << " cuvs::udf::squared_diff(x, y) // No template args!\n"; - std::cout << " cuvs::udf::abs_diff(x, y)\n"; - std::cout << " cuvs::udf::dot_product(x, y)\n\n"; - - std::cout << "At runtime, cuVS wraps raw values in point\n"; - std::cout << "and calls your metric with the wrapped arguments.\n"; - - return 0; -} diff --git a/examples/cpp/src/udf_weighted_metric.cu b/examples/cpp/src/udf_weighted_metric.cu deleted file mode 100644 index ea1e13c89b..0000000000 --- a/examples/cpp/src/udf_weighted_metric.cu +++ /dev/null @@ -1,133 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -/** - * @file udf_weighted_metric.cu - * @brief Example: Custom metric with helper headers - * - * This example shows how to use custom headers with your UDF metric. - * Headers are passed to NVRTC's virtual filesystem. - */ - -#include -#include -#include - -#include - -int main() -{ - raft::device_resources res; - - // ============================================================ - // Define helper header - // ============================================================ - // - // If your metric needs helper functions or constants, - // you can provide them as headers. - - std::string math_utils_header = R"( - #pragma once - - namespace my_utils { - - template - __device__ __forceinline__ T safe_abs(T x) { - return (x < T{0}) ? -x : x; - } - - template - __device__ __forceinline__ T clamp(T x, T lo, T hi) { - return (x < lo) ? lo : ((x > hi) ? hi : x); - } - - // Custom weight function - could be learned from data! - template - __device__ __forceinline__ T importance_weight() { - return T{2.5}; - } - - } // namespace my_utils - )"; - - // ============================================================ - // Define metric using the helper header and point wrapper - // ============================================================ - - cuvs::udf::metric_source weighted_metric = { - .source = R"( - #include "math_utils.cuh" - #include - - template - struct weighted_euclidean - : cuvs::udf::metric_interface - { - using point_type = cuvs::udf::point; - - __device__ void operator()(AccT& acc, point_type x, point_type y) override { - // Use helper for optimal squared diff - auto sq_diff = cuvs::udf::squared_diff(x, y); - - // Apply custom weight - auto weight = my_utils::importance_weight(); - acc += weight * sq_diff; - } - }; - )", - .struct_name = "weighted_euclidean", - - // Provide the header content - .headers = {{"math_utils.cuh", math_utils_header}}}; - - // ============================================================ - // Alternative: Per-dimension weights using element access - // ============================================================ - - cuvs::udf::metric_source per_dim_weighted = { - .source = R"( - #include - - template - struct per_dim_weighted_l2 - : cuvs::udf::metric_interface - { - using point_type = cuvs::udf::point; - - __device__ void operator()(AccT& acc, point_type x, point_type y) override { - // Per-dimension weights using element access - for (int i = 0; i < x.size(); ++i) { - auto diff = x[i] - y[i]; - auto weight = AccT{1} + AccT{i} * AccT{0.1}; // Increasing weights - acc += weight * diff * diff; - } - } - }; - )", - .struct_name = "per_dim_weighted_l2", - .headers = {}}; - - // ============================================================ - // Search configuration - // ============================================================ - - cuvs::neighbors::ivf_flat::search_params params; - params.n_probes = 50; - // params.udf.metric = weighted_metric; - - std::cout << "Weighted Euclidean distance metric example!\n"; - std::cout << "\n"; - std::cout << "This demonstrates:\n"; - std::cout << " 1. Using custom helper headers with UDFs\n"; - std::cout << " 2. Using cuvs::udf::squared_diff(x, y) helper\n"; - std::cout << " 3. Per-dimension weights using x[i], y[i] element access\n"; - std::cout << "\n"; - std::cout << "The point wrapper provides:\n"; - std::cout << " - squared_diff(x, y) : optimal for all types\n"; - std::cout << " - x[i], y[i] : element access for custom logic\n"; - std::cout << " - x.raw() : raw storage for power users\n"; - - return 0; -} From 8b2775cd99310dbde04f6c7e0708dd78db9fdb1a Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 3 Feb 2026 21:02:10 +0000 Subject: [PATCH 076/158] run benchmarks --- cpp/bench/ann/CMakeLists.txt | 14 +- cpp/include/cuvs/neighbors/ivf_flat.hpp | 2 +- .../jit_lto/NVRTCLTOFragmentCompiler.cu | 5 +- .../neighbors/ivf_flat/ivf_flat_search.cuh | 8 + cpp/tests/CMakeLists.txt | 14 + .../ann_ivf_flat/ivf_flat_udf_bench.cu | 470 ++++++++++++++++ .../ann_ivf_flat/plot_udf_benchmark.py | 211 ++++++++ cpp/tests/neighbors/ann_ivf_flat/test_udf.cu | 502 ++++++++++-------- .../neighbors/ann_ivf_flat/udf_results.png | Bin 0 -> 217724 bytes 9 files changed, 1002 insertions(+), 224 deletions(-) create mode 100644 cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu create mode 100644 cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py create mode 100644 cpp/tests/neighbors/ann_ivf_flat/udf_results.png diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 8d254c0933..7a8177e4b8 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -240,6 +240,18 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) ConfigureAnnBench( NAME CUVS_IVF_FLAT PATH src/cuvs/cuvs_benchmark.cu src/cuvs/cuvs_ivf_flat.cu LINKS cuvs ) + + # UDF benchmark - standalone executable + add_executable(CUVS_IVF_FLAT_UDF_BENCH src/cuvs/ivf_flat_udf_bench.cu) + target_link_libraries(CUVS_IVF_FLAT_UDF_BENCH PRIVATE cuvs) + set_target_properties( + CUVS_IVF_FLAT_UDF_BENCH + PROPERTIES CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + ) + add_dependencies(CUVS_ANN_BENCH_ALL CUVS_IVF_FLAT_UDF_BENCH) endif() if(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE) diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index 56162c0d67..9e9b722b75 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -3445,7 +3445,7 @@ struct )" #NAME R"( : metric_interface { \ namespace cuvs { namespace neighbors { namespace ivf_flat { namespace detail { \ template \ -__device__ void compute_dist(AccT& acc, AccT x, AccT y) \ +__device__ __forceinline__ void compute_dist(AccT& acc, AccT x, AccT y) \ { \ ::)" #NAME R"( metric; \ metric(acc, ::point(x), ::point(y)); \ diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index f0af0369a3..06330f50ad 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -33,8 +33,9 @@ NVRTCLTOFragmentCompiler::NVRTCLTOFragmentCompiler() this->standard_compile_opts.resize(4); std::size_t i = 0; - this->standard_compile_opts[i++] = - std::string{"-arch=sm_" + std::to_string((major * 10 + minor))}; + // this->standard_compile_opts[i++] = + // std::string{"-arch=sm_" + std::to_string((major * 10 + minor))}; + this->standard_compile_opts[i++] = std::string{"-arch=sm_75"}; this->standard_compile_opts[i++] = std::string{"-dlto"}; this->standard_compile_opts[i++] = std::string{"-rdc=true"}; this->standard_compile_opts[i++] = std::string{"-default-device"}; diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh index 7d84ece485..916e757270 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -383,6 +383,8 @@ void search_with_filtering(raft::resources const& handle, RAFT_EXPECTS(queries.extent(1) == index.dim(), "Number of query dimensions should equal number of dimensions in the index."); + // Save original metric and temporarily set to CustomUDF if using UDF + auto original_metric = index.metric(); if (params.metric_udf.has_value()) { const_cast>&>(index).set_metric( cuvs::distance::DistanceType::CustomUDF); @@ -397,6 +399,12 @@ void search_with_filtering(raft::resources const& handle, neighbors.data_handle(), distances.data_handle(), sample_filter); + + // Restore original metric + if (params.metric_udf.has_value()) { + const_cast>&>(index).set_metric( + original_metric); + } } template diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 45ae1dc88d..baafe790b4 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -138,6 +138,20 @@ ConfigureTest( PERCENT 100 ) +# UDF benchmark executable (not a gtest, outputs CSV) +add_executable(NEIGHBORS_ANN_IVF_FLAT_UDF_BENCH neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu) +target_link_libraries( + NEIGHBORS_ANN_IVF_FLAT_UDF_BENCH PRIVATE cuvs $ +) +set_target_properties( + NEIGHBORS_ANN_IVF_FLAT_UDF_BENCH + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bench" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON +) + ConfigureTest( NAME NEIGHBORS_ANN_IVF_PQ_TEST PATH neighbors/ann_ivf_pq/test_float_int64_t.cu neighbors/ann_ivf_pq/test_int8_t_int64_t.cu diff --git a/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu b/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu new file mode 100644 index 0000000000..0deb101cf1 --- /dev/null +++ b/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu @@ -0,0 +1,470 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + * + * Benchmark comparing built-in L2 vs Custom UDF L2 for IVF-Flat search + * Outputs results as CSV for plotting with Python + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// Define custom L2 metric using the CUVS_METRIC macro +CUVS_METRIC(custom_l2, { acc += squared_diff(x, y); }) + +// Raw UDF that directly implements compute_dist matching built-in exactly +inline std::string raw_l2_udf() +{ + return R"( +/* Fixed-width integer types for nvrtc */ +using int8_t = signed char; +using uint8_t = unsigned char; +using int32_t = int; +using uint32_t = unsigned int; + +/* std::is_same_v for nvrtc */ +namespace std { +template struct is_same { static constexpr bool value = false; }; +template struct is_same { static constexpr bool value = true; }; +template inline constexpr bool is_same_v = is_same::value; +} + +namespace cuvs { namespace neighbors { namespace ivf_flat { namespace detail { + +template +__device__ __forceinline__ void compute_dist(AccT& acc, AccT x, AccT y) +{ + if constexpr (std::is_same_v && Veclen > 1) { + // int8 with SIMD - use intrinsics like the built-in + const auto diff = __vabsdiffs4(x, y); + acc = __dp4a(diff, diff, static_cast(acc)); + } else { + // float or scalar int - simple formula + const auto diff = x - y; + acc += diff * diff; + } +} + +}}}} +)"; +} + +namespace { + +using namespace cuvs::neighbors; + +// ============================================================================ +// Clear NVIDIA compute cache for accurate JIT timing +// ============================================================================ + +void clear_compute_cache() +{ + const char* home = std::getenv("HOME"); + if (home) { + std::filesystem::path cache_path = std::filesystem::path(home) / ".nv" / "ComputeCache"; + std::error_code ec; + std::filesystem::remove_all(cache_path, ec); + // Ignore errors - cache may not exist + } +} + +// ============================================================================ +// Timing utilities +// ============================================================================ + +class Timer { + public: + void start() { start_ = std::chrono::high_resolution_clock::now(); } + + double stop_ms() + { + auto end = std::chrono::high_resolution_clock::now(); + return std::chrono::duration(end - start_).count(); + } + + private: + std::chrono::high_resolution_clock::time_point start_; +}; + +double median(std::vector& times) +{ + std::sort(times.begin(), times.end()); + size_t n = times.size(); + if (n % 2 == 0) { return (times[n / 2 - 1] + times[n / 2]) / 2.0; } + return times[n / 2]; +} + +// ============================================================================ +// Data generation +// ============================================================================ + +template +void generate_random_data(std::vector& data, size_t n, std::mt19937& rng) +{ + if constexpr (std::is_same_v) { + std::uniform_real_distribution dist(-1.0f, 1.0f); + for (size_t i = 0; i < n; ++i) { + data[i] = dist(rng); + } + } else if constexpr (std::is_same_v) { + std::uniform_int_distribution dist(-127, 127); + for (size_t i = 0; i < n; ++i) { + data[i] = static_cast(dist(rng)); + } + } +} + +// ============================================================================ +// Benchmark result structure +// ============================================================================ + +struct SearchResult { + double first_ms; + double median_ms; +}; + +struct BenchmarkResult { + std::string dtype; + int64_t k; + double first_builtin_ms; + double first_udf_ms; + double first_raw_ms; + double median_builtin_ms; + double median_udf_ms; + double median_raw_ms; +}; + +// ============================================================================ +// Benchmark runners (separate functions for built-in and UDF) +// ============================================================================ + +template +SearchResult run_builtin_benchmark( + raft::resources& handle, int64_t num_vectors, int64_t dim, int64_t k, int num_iterations) +{ + // Clear NVIDIA compute cache for accurate JIT timing + clear_compute_cache(); + + auto stream = raft::resource::get_cuda_stream(handle); + Timer timer; + + // Generate random data + std::mt19937 rng(42); + std::vector h_database(num_vectors * dim); + std::vector h_queries(100 * dim); + generate_random_data(h_database, h_database.size(), rng); + generate_random_data(h_queries, h_queries.size(), rng); + + int64_t num_queries = 100; + + // Copy to device + rmm::device_uvector d_database(num_vectors * dim, stream); + rmm::device_uvector d_queries(num_queries * dim, stream); + raft::copy(d_database.data(), h_database.data(), h_database.size(), stream); + raft::copy(d_queries.data(), h_queries.data(), h_queries.size(), stream); + + auto database_view = + raft::make_device_matrix_view(d_database.data(), num_vectors, dim); + auto queries_view = + raft::make_device_matrix_view(d_queries.data(), num_queries, dim); + + // Build index + ivf_flat::index_params index_params; + index_params.n_lists = 1024; + index_params.metric = cuvs::distance::DistanceType::L2Expanded; + + auto idx = ivf_flat::build(handle, index_params, database_view); + raft::resource::sync_stream(handle); + + // Allocate output buffers + rmm::device_uvector d_indices(num_queries * k, stream); + rmm::device_uvector d_distances(num_queries * k, stream); + + auto indices_view = + raft::make_device_matrix_view(d_indices.data(), num_queries, k); + auto distances_view = + raft::make_device_matrix_view(d_distances.data(), num_queries, k); + + // Search params + ivf_flat::search_params search_params; + search_params.n_probes = 32; + + SearchResult result; + + // First search (includes JIT compilation) + timer.start(); + ivf_flat::search(handle, search_params, idx, queries_view, indices_view, distances_view); + raft::resource::sync_stream(handle); + result.first_ms = timer.stop_ms(); + + // Repeated searches (JIT already cached) + std::vector times; + for (int i = 0; i < num_iterations; ++i) { + timer.start(); + ivf_flat::search(handle, search_params, idx, queries_view, indices_view, distances_view); + raft::resource::sync_stream(handle); + times.push_back(timer.stop_ms()); + } + + result.median_ms = median(times); + return result; +} + +template +SearchResult run_udf_benchmark( + raft::resources& handle, int64_t num_vectors, int64_t dim, int64_t k, int num_iterations) +{ + // Clear NVIDIA compute cache for accurate JIT timing + clear_compute_cache(); + + auto stream = raft::resource::get_cuda_stream(handle); + Timer timer; + + // Generate random data (same seed as built-in for consistency) + std::mt19937 rng(42); + std::vector h_database(num_vectors * dim); + std::vector h_queries(100 * dim); + generate_random_data(h_database, h_database.size(), rng); + generate_random_data(h_queries, h_queries.size(), rng); + + int64_t num_queries = 100; + + // Copy to device + rmm::device_uvector d_database(num_vectors * dim, stream); + rmm::device_uvector d_queries(num_queries * dim, stream); + raft::copy(d_database.data(), h_database.data(), h_database.size(), stream); + raft::copy(d_queries.data(), h_queries.data(), h_queries.size(), stream); + + auto database_view = + raft::make_device_matrix_view(d_database.data(), num_vectors, dim); + auto queries_view = + raft::make_device_matrix_view(d_queries.data(), num_queries, dim); + + // Build index with L2Expanded (kmeans doesn't support CustomUDF) + // The UDF is only used during search + ivf_flat::index_params index_params; + index_params.n_lists = 1024; + index_params.metric = cuvs::distance::DistanceType::L2Expanded; + + auto idx = ivf_flat::build(handle, index_params, database_view); + raft::resource::sync_stream(handle); + + // Allocate output buffers + rmm::device_uvector d_indices(num_queries * k, stream); + rmm::device_uvector d_distances(num_queries * k, stream); + + auto indices_view = + raft::make_device_matrix_view(d_indices.data(), num_queries, k); + auto distances_view = + raft::make_device_matrix_view(d_distances.data(), num_queries, k); + + // Search params with UDF + ivf_flat::search_params search_params; + search_params.n_probes = 32; + search_params.metric_udf = custom_l2_udf(); + + SearchResult result; + + // First search (includes JIT compilation) + timer.start(); + ivf_flat::search(handle, search_params, idx, queries_view, indices_view, distances_view); + raft::resource::sync_stream(handle); + result.first_ms = timer.stop_ms(); + + // Repeated searches (JIT already cached) + std::vector times; + for (int i = 0; i < num_iterations; ++i) { + timer.start(); + ivf_flat::search(handle, search_params, idx, queries_view, indices_view, distances_view); + raft::resource::sync_stream(handle); + times.push_back(timer.stop_ms()); + } + + result.median_ms = median(times); + return result; +} + +template +SearchResult run_raw_udf_benchmark( + raft::resources& handle, int64_t num_vectors, int64_t dim, int64_t k, int num_iterations) +{ + // Clear NVIDIA compute cache for accurate JIT timing + clear_compute_cache(); + + auto stream = raft::resource::get_cuda_stream(handle); + Timer timer; + + // Generate random data (same seed as built-in for consistency) + std::mt19937 rng(42); + std::vector h_database(num_vectors * dim); + std::vector h_queries(100 * dim); + generate_random_data(h_database, h_database.size(), rng); + generate_random_data(h_queries, h_queries.size(), rng); + + int64_t num_queries = 100; + + // Copy to device + rmm::device_uvector d_database(num_vectors * dim, stream); + rmm::device_uvector d_queries(num_queries * dim, stream); + raft::copy(d_database.data(), h_database.data(), h_database.size(), stream); + raft::copy(d_queries.data(), h_queries.data(), h_queries.size(), stream); + + auto database_view = + raft::make_device_matrix_view(d_database.data(), num_vectors, dim); + auto queries_view = + raft::make_device_matrix_view(d_queries.data(), num_queries, dim); + + // Build index with L2Expanded (kmeans doesn't support CustomUDF) + ivf_flat::index_params index_params; + index_params.n_lists = 1024; + index_params.metric = cuvs::distance::DistanceType::L2Expanded; + + auto idx = ivf_flat::build(handle, index_params, database_view); + raft::resource::sync_stream(handle); + + // Allocate output buffers + rmm::device_uvector d_indices(num_queries * k, stream); + rmm::device_uvector d_distances(num_queries * k, stream); + + auto indices_view = + raft::make_device_matrix_view(d_indices.data(), num_queries, k); + auto distances_view = + raft::make_device_matrix_view(d_distances.data(), num_queries, k); + + // Search params with raw UDF (no point/metric_interface overhead) + ivf_flat::search_params search_params; + search_params.n_probes = 32; + search_params.metric_udf = raw_l2_udf(); + + SearchResult result; + + // First search (includes JIT compilation) + timer.start(); + ivf_flat::search(handle, search_params, idx, queries_view, indices_view, distances_view); + raft::resource::sync_stream(handle); + result.first_ms = timer.stop_ms(); + + // Repeated searches (JIT already cached) + std::vector times; + for (int i = 0; i < num_iterations; ++i) { + timer.start(); + ivf_flat::search(handle, search_params, idx, queries_view, indices_view, distances_view); + raft::resource::sync_stream(handle); + times.push_back(timer.stop_ms()); + } + + result.median_ms = median(times); + return result; +} + +template +BenchmarkResult run_benchmark(raft::resources& handle, + const char* dtype_name, + int64_t num_vectors, + int64_t dim, + int64_t k, + int num_iterations = 20) +{ + BenchmarkResult result; + result.dtype = dtype_name; + result.k = k; + + // Run built-in benchmark (with fresh cache) + auto builtin = run_builtin_benchmark(handle, num_vectors, dim, k, num_iterations); + result.first_builtin_ms = builtin.first_ms; + result.median_builtin_ms = builtin.median_ms; + + // Run UDF benchmark (with fresh cache) + auto udf = run_udf_benchmark(handle, num_vectors, dim, k, num_iterations); + result.first_udf_ms = udf.first_ms; + result.median_udf_ms = udf.median_ms; + + // Run raw UDF benchmark (with fresh cache) + auto raw = run_raw_udf_benchmark(handle, num_vectors, dim, k, num_iterations); + result.first_raw_ms = raw.first_ms; + result.median_raw_ms = raw.median_ms; + + return result; +} + +} // namespace + +int main(int argc, char** argv) +{ + std::string output_file = "udf_benchmark_results.csv"; + if (argc > 1) { output_file = argv[1]; } + + raft::resources handle; + + const int64_t num_vectors = 1000000; + const int64_t dim = 512; + const std::vector k_values = {4, 16, 64, 256}; + const int num_iterations = 20; + + std::vector results; + + std::cerr << "IVF-Flat UDF Benchmark\n"; + std::cerr << "Dataset: " << num_vectors << " vectors, " << dim << " dimensions\n"; + std::cerr << "Queries: 100, n_probes: 32, n_lists: 1024\n"; + std::cerr << "Iterations for median: " << num_iterations << "\n\n"; + + // Float32 benchmarks + std::cerr << "Running float32 benchmarks...\n"; + for (int64_t k : k_values) { + std::cerr << " k=" << k << "... "; + auto result = run_benchmark(handle, "float32", num_vectors, dim, k, num_iterations); + results.push_back(result); + std::cerr << "done\n"; + } + + // Int8 benchmarks + std::cerr << "Running int8 benchmarks...\n"; + for (int64_t k : k_values) { + std::cerr << " k=" << k << "... "; + auto result = run_benchmark(handle, "int8", num_vectors, dim, k, num_iterations); + results.push_back(result); + std::cerr << "done\n"; + } + + // Write CSV + std::ofstream csv(output_file); + csv << "dtype,k,median_builtin_ms,median_udf_ms,median_raw_ms,udf_ratio,raw_ratio\n"; + + for (const auto& r : results) { + double udf_ratio = r.median_udf_ms / r.median_builtin_ms; + double raw_ratio = r.median_raw_ms / r.median_builtin_ms; + + csv << r.dtype << "," << r.k << "," << std::fixed << std::setprecision(3) << r.median_builtin_ms + << "," << r.median_udf_ms << "," << r.median_raw_ms << "," << std::setprecision(4) + << udf_ratio << "," << raw_ratio << "\n"; + } + + csv.close(); + std::cerr << "\nResults written to: " << output_file << "\n"; + + // Also print to stdout for convenience + std::cout << "dtype,k,median_builtin_ms,median_udf_ms,median_raw_ms,udf_ratio,raw_ratio\n"; + for (const auto& r : results) { + double udf_ratio = r.median_udf_ms / r.median_builtin_ms; + double raw_ratio = r.median_raw_ms / r.median_builtin_ms; + + std::cout << r.dtype << "," << r.k << "," << std::fixed << std::setprecision(3) + << r.median_builtin_ms << "," << r.median_udf_ms << "," << r.median_raw_ms << "," + << std::setprecision(4) << udf_ratio << "," << raw_ratio << "\n"; + } + + return 0; +} diff --git a/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py b/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py new file mode 100644 index 0000000000..5ee8778bff --- /dev/null +++ b/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Plot UDF benchmark results from CSV output. + +Usage: + python plot_udf_benchmark.py udf_benchmark_results.csv +""" + +import sys +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def plot_benchmark_results(csv_file: str): + # Read data + df = pd.read_csv(csv_file) + + # Create figure with 2x2 subplots + fig, axes = plt.subplots(2, 2, figsize=(14, 10)) + fig.suptitle( + "IVF-Flat UDF Benchmark: Built-in L2 vs Custom UDF L2\n(1M vectors, 512 dims, 100 queries)", + fontsize=14, + fontweight="bold", + ) + + colors = {"float32": "#2ecc71", "int8": "#3498db"} + + # ========================================================================= + # Plot 1: First search time (JIT compilation cost) + # ========================================================================= + ax1 = axes[0, 0] + + for dtype in ["float32", "int8"]: + data = df[df["dtype"] == dtype] + x = np.arange(len(data)) + width = 0.35 + offset = -width / 2 if dtype == "float32" else width / 2 + + ax1.bar( + x + offset, + data["first_builtin_ms"], + width, + label=f"{dtype} Built-in", + color=colors[dtype], + alpha=0.7, + ) + ax1.bar( + x + offset, + data["first_udf_ms"] - data["first_builtin_ms"], + width, + bottom=data["first_builtin_ms"], + label=f"{dtype} UDF overhead", + color=colors[dtype], + alpha=0.4, + hatch="//", + ) + + ax1.set_xlabel("k (neighbors)") + ax1.set_ylabel("Time (ms)") + ax1.set_title("First Search Time (includes JIT compilation)") + ax1.set_xticks(np.arange(len(df[df["dtype"] == "float32"]))) + ax1.set_xticklabels(df[df["dtype"] == "float32"]["k"]) + ax1.legend(loc="upper left") + ax1.grid(axis="y", alpha=0.3) + + # ========================================================================= + # Plot 2: JIT overhead + # ========================================================================= + ax2 = axes[0, 1] + + for dtype in ["float32", "int8"]: + data = df[df["dtype"] == dtype] + ax2.plot( + data["k"], + data["jit_overhead_ms"], + "o-", + label=dtype, + color=colors[dtype], + linewidth=2, + markersize=8, + ) + + ax2.set_xlabel("k (neighbors)") + ax2.set_ylabel("JIT Overhead (ms)") + ax2.set_title("UDF JIT Compilation Overhead\n(First UDF - First Built-in)") + ax2.legend() + ax2.grid(alpha=0.3) + ax2.set_xscale("log", base=2) + + # ========================================================================= + # Plot 3: Median search time (cached) + # ========================================================================= + ax3 = axes[1, 0] + + width = 0.35 + for i, dtype in enumerate(["float32", "int8"]): + data = df[df["dtype"] == dtype] + x = np.arange(len(data)) + offset = (i - 0.5) * width + + ax3.bar( + x + offset - width / 4, + data["median_builtin_ms"], + width / 2, + label=f"{dtype} Built-in", + color=colors[dtype], + alpha=0.8, + ) + ax3.bar( + x + offset + width / 4, + data["median_udf_ms"], + width / 2, + label=f"{dtype} UDF", + color=colors[dtype], + alpha=0.4, + hatch="//", + ) + + ax3.set_xlabel("k (neighbors)") + ax3.set_ylabel("Time (ms)") + ax3.set_title("Median Search Time (JIT cached, 20 iterations)") + ax3.set_xticks(np.arange(len(df[df["dtype"] == "float32"]))) + ax3.set_xticklabels(df[df["dtype"] == "float32"]["k"]) + ax3.legend(loc="upper left") + ax3.grid(axis="y", alpha=0.3) + + # ========================================================================= + # Plot 4: UDF/Built-in ratio + # ========================================================================= + ax4 = axes[1, 1] + + for dtype in ["float32", "int8"]: + data = df[df["dtype"] == dtype] + ax4.plot( + data["k"], + data["udf_builtin_ratio"], + "o-", + label=dtype, + color=colors[dtype], + linewidth=2, + markersize=8, + ) + + ax4.axhline( + y=1.0, + color="red", + linestyle="--", + alpha=0.5, + label="1.0x (no overhead)", + ) + ax4.set_xlabel("k (neighbors)") + ax4.set_ylabel("UDF / Built-in Ratio") + ax4.set_title("UDF Performance Ratio\n(closer to 1.0 = better)") + ax4.legend() + ax4.grid(alpha=0.3) + ax4.set_xscale("log", base=2) + ax4.set_ylim(0.9, max(df["udf_builtin_ratio"].max() * 1.1, 1.2)) + + plt.tight_layout() + + # Save figure + output_file = csv_file.replace(".csv", ".png") + plt.savefig(output_file, dpi=150, bbox_inches="tight") + print(f"Plot saved to: {output_file}") + + # Also show + plt.show() + + +def print_summary(csv_file: str): + """Print a summary table of results.""" + df = pd.read_csv(csv_file) + + print("\n" + "=" * 80) + print("UDF Benchmark Summary") + print("=" * 80) + print( + f"\n{'dtype':<10} {'k':<6} {'First Builtin':<15} {'First UDF':<15} {'JIT Overhead':<15} {'Median Builtin':<15} {'Median UDF':<15} {'Ratio':<10}" + ) + print("-" * 100) + + for _, row in df.iterrows(): + print( + f"{row['dtype']:<10} {row['k']:<6} {row['first_builtin_ms']:<15.2f} {row['first_udf_ms']:<15.2f} {row['jit_overhead_ms']:<15.2f} {row['median_builtin_ms']:<15.2f} {row['median_udf_ms']:<15.2f} {row['udf_builtin_ratio']:<10.3f}" + ) + + print("\n" + "=" * 80) + print("Key Observations:") + print(f" - Average JIT overhead: {df['jit_overhead_ms'].mean():.2f} ms") + print( + f" - Average UDF/Built-in ratio: {df['udf_builtin_ratio'].mean():.3f}x" + ) + print(f" - Max UDF/Built-in ratio: {df['udf_builtin_ratio'].max():.3f}x") + print("=" * 80 + "\n") + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python plot_udf_benchmark.py ") + print( + "Example: python plot_udf_benchmark.py udf_benchmark_results.csv" + ) + sys.exit(1) + + csv_file = sys.argv[1] + print_summary(csv_file) + plot_benchmark_results(csv_file) diff --git a/cpp/tests/neighbors/ann_ivf_flat/test_udf.cu b/cpp/tests/neighbors/ann_ivf_flat/test_udf.cu index ec13759ad2..ef6486ed4e 100644 --- a/cpp/tests/neighbors/ann_ivf_flat/test_udf.cu +++ b/cpp/tests/neighbors/ann_ivf_flat/test_udf.cu @@ -23,25 +23,22 @@ namespace cuvs::neighbors::ivf_flat { // Custom L2 (squared Euclidean) metric - should match built-in L2 CUVS_METRIC(custom_l2, { acc += squared_diff(x, y); }) -// Custom inner product metric - should match built-in InnerProduct -// Note: Built-in uses negative inner product (larger similarity = smaller distance) -CUVS_METRIC(custom_inner_product, { acc -= dot_product(x, y); }) - -// Custom L1 (Manhattan) metric -CUVS_METRIC(custom_l1, { acc += abs_diff(x, y); }) - // ============================================================================ -// Test fixture +// Test data traits for different types // ============================================================================ -class IvfFlatUdfTest : public ::testing::Test { - protected: - void SetUp() override +template +struct TestDataTraits; + +template <> +struct TestDataTraits { + static constexpr int64_t dim = 4; + static constexpr int64_t num_db_vecs = 8; + + static std::vector database() { - // Hardcoded 2D dataset for easy manual verification - // 8 database vectors in 4 dimensions - // - // Vectors arranged so we can easily verify distances: + // 4-dimensional float dataset + // Vectors arranged for easy distance verification: // db[0] = [0, 0, 0, 0] - origin // db[1] = [1, 0, 0, 0] - unit along x // db[2] = [0, 1, 0, 0] - unit along y @@ -49,23 +46,24 @@ class IvfFlatUdfTest : public ::testing::Test { // db[4] = [1, 1, 0, 0] - diagonal in xy // db[5] = [2, 0, 0, 0] - 2 units along x // db[6] = [1, 1, 1, 1] - all ones - // db[7] = [3, 4, 0, 0] - for 3-4-5 triangle verification - // - database_ = { + // db[7] = [3, 4, 0, 0] - for 3-4-5 triangle + return { 0.0f, 0.0f, 0.0f, 0.0f, // db[0]: origin 1.0f, 0.0f, 0.0f, 0.0f, // db[1]: L2 dist from origin = 1 0.0f, 1.0f, 0.0f, 0.0f, // db[2]: L2 dist from origin = 1 0.0f, 0.0f, 1.0f, 0.0f, // db[3]: L2 dist from origin = 1 - 1.0f, 1.0f, 0.0f, 0.0f, // db[4]: L2 dist from origin = sqrt(2) ≈ 1.414 - 2.0f, 0.0f, 0.0f, 0.0f, // db[5]: L2 dist from origin = 2 - 1.0f, 1.0f, 1.0f, 1.0f, // db[6]: L2 dist from origin = 2 - 3.0f, 4.0f, 0.0f, 0.0f, // db[7]: L2 dist from origin = 5 + 1.0f, 1.0f, 0.0f, 0.0f, // db[4]: L2 dist from origin = 2 + 2.0f, 0.0f, 0.0f, 0.0f, // db[5]: L2 dist from origin = 4 + 1.0f, 1.0f, 1.0f, 1.0f, // db[6]: L2 dist from origin = 4 + 3.0f, 4.0f, 0.0f, 0.0f, // db[7]: L2 dist from origin = 25 }; + } - // Query vectors - // query[0] = origin - nearest neighbors should be db[0], then db[1,2,3] (all dist=1) - // query[1] = [1,0,0,0] - nearest is db[1] (dist=0), then db[0,4] (dist=1) - queries_ = { + static std::vector queries() + { + // query[0] = origin - nearest is db[0] (dist=0) + // query[1] = [1,0,0,0] - nearest is db[1] (dist=0) + return { 0.0f, 0.0f, 0.0f, @@ -75,18 +73,208 @@ class IvfFlatUdfTest : public ::testing::Test { 0.0f, 0.0f, // query[1]: same as db[1] }; + } + + // Expected: query[0] nearest is db[0] with distance 0 + static int64_t expected_nearest_idx_q0() { return 0; } + static float expected_nearest_dist_q0() { return 0.0f; } + + // Expected: query[1] nearest is db[1] with distance 0 + static int64_t expected_nearest_idx_q1() { return 1; } + static float expected_nearest_dist_q1() { return 0.0f; } +}; + +template <> +struct TestDataTraits { + static constexpr int64_t dim = 16; + static constexpr int64_t num_db_vecs = 8; + + static std::vector database() + { + // 16-dimensional int8 dataset to test vectorized SIMD intrinsics + return { + // db[0]: all zeros + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + // db[1]: unit in first dim + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + // db[2]: unit in second dim + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + // db[3]: all ones - L2 dist from zeros = 16 + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + // db[4]: first 12 dims are 2 - L2 dist from zeros = 48 + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + // db[5]: all twos - L2 dist from zeros = 64 + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + // db[6]: alternating 1,0 - L2 dist from zeros = 8 + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + // db[7]: alternating 0,1 - L2 dist from zeros = 8 + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + }; + } + + static std::vector queries() + { + // query[0] = all zeros - nearest is db[0] (dist=0) + // query[1] = all ones - nearest is db[3] (dist=0) + return { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // query[0] + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // query[1] + }; + } - num_db_vecs_ = 8; + // Expected: query[0] nearest is db[0] with distance 0 + static int64_t expected_nearest_idx_q0() { return 0; } + static float expected_nearest_dist_q0() { return 0.0f; } + + // Expected: query[1] nearest is db[3] with distance 0 + static int64_t expected_nearest_idx_q1() { return 3; } + static float expected_nearest_dist_q1() { return 0.0f; } +}; + +// ============================================================================ +// Templated test fixture +// ============================================================================ + +template +class IvfFlatUdfTest : public ::testing::Test { + protected: + using Traits = TestDataTraits; + + void SetUp() override + { + database_ = Traits::database(); + queries_ = Traits::queries(); + num_db_vecs_ = Traits::num_db_vecs; num_queries_ = 2; - dim_ = 4; + dim_ = Traits::dim; k_ = 4; - n_lists_ = 2; // Small number for this tiny dataset - n_probes_ = 2; // Search all clusters + n_lists_ = 2; + n_probes_ = 2; } raft::resources handle_; - std::vector database_; - std::vector queries_; + std::vector database_; + std::vector queries_; int64_t num_db_vecs_; int64_t num_queries_; int64_t dim_; @@ -95,52 +283,58 @@ class IvfFlatUdfTest : public ::testing::Test { uint32_t n_probes_; }; +using TestTypes = ::testing::Types; +TYPED_TEST_SUITE(IvfFlatUdfTest, TestTypes); + // ============================================================================ -// Test: UDF L2 metric matches built-in L2 +// Test: UDF L2 metric matches built-in L2 and produces correct distances // ============================================================================ -TEST_F(IvfFlatUdfTest, CustomL2MatchesBuiltIn) +TYPED_TEST(IvfFlatUdfTest, CustomL2MatchesBuiltIn) { - auto stream = raft::resource::get_cuda_stream(handle_); + using T = TypeParam; + using Traits = TestDataTraits; + + auto stream = raft::resource::get_cuda_stream(this->handle_); // Copy data to device - rmm::device_uvector d_database(num_db_vecs_ * dim_, stream); - rmm::device_uvector d_queries(num_queries_ * dim_, stream); - raft::copy(d_database.data(), database_.data(), database_.size(), stream); - raft::copy(d_queries.data(), queries_.data(), queries_.size(), stream); + rmm::device_uvector d_database(this->num_db_vecs_ * this->dim_, stream); + rmm::device_uvector d_queries(this->num_queries_ * this->dim_, stream); + raft::copy(d_database.data(), this->database_.data(), this->database_.size(), stream); + raft::copy(d_queries.data(), this->queries_.data(), this->queries_.size(), stream); - auto database_view = - raft::make_device_matrix_view(d_database.data(), num_db_vecs_, dim_); - auto queries_view = - raft::make_device_matrix_view(d_queries.data(), num_queries_, dim_); + auto database_view = raft::make_device_matrix_view( + d_database.data(), this->num_db_vecs_, this->dim_); + auto queries_view = raft::make_device_matrix_view( + d_queries.data(), this->num_queries_, this->dim_); // Build index with L2 metric ivf_flat::index_params index_params; - index_params.n_lists = n_lists_; + index_params.n_lists = this->n_lists_; index_params.metric = cuvs::distance::DistanceType::L2Expanded; - auto idx = ivf_flat::build(handle_, index_params, database_view); + auto idx = ivf_flat::build(this->handle_, index_params, database_view); // Allocate output buffers - rmm::device_uvector d_indices_builtin(num_queries_ * k_, stream); - rmm::device_uvector d_distances_builtin(num_queries_ * k_, stream); - rmm::device_uvector d_indices_udf(num_queries_ * k_, stream); - rmm::device_uvector d_distances_udf(num_queries_ * k_, stream); - - auto indices_builtin_view = - raft::make_device_matrix_view(d_indices_builtin.data(), num_queries_, k_); - auto distances_builtin_view = - raft::make_device_matrix_view(d_distances_builtin.data(), num_queries_, k_); - auto indices_udf_view = - raft::make_device_matrix_view(d_indices_udf.data(), num_queries_, k_); - auto distances_udf_view = - raft::make_device_matrix_view(d_distances_udf.data(), num_queries_, k_); + rmm::device_uvector d_indices_builtin(this->num_queries_ * this->k_, stream); + rmm::device_uvector d_distances_builtin(this->num_queries_ * this->k_, stream); + rmm::device_uvector d_indices_udf(this->num_queries_ * this->k_, stream); + rmm::device_uvector d_distances_udf(this->num_queries_ * this->k_, stream); + + auto indices_builtin_view = raft::make_device_matrix_view( + d_indices_builtin.data(), this->num_queries_, this->k_); + auto distances_builtin_view = raft::make_device_matrix_view( + d_distances_builtin.data(), this->num_queries_, this->k_); + auto indices_udf_view = raft::make_device_matrix_view( + d_indices_udf.data(), this->num_queries_, this->k_); + auto distances_udf_view = raft::make_device_matrix_view( + d_distances_udf.data(), this->num_queries_, this->k_); // Search with built-in metric ivf_flat::search_params search_params_builtin; - search_params_builtin.n_probes = n_probes_; + search_params_builtin.n_probes = this->n_probes_; - ivf_flat::search(handle_, + ivf_flat::search(this->handle_, search_params_builtin, idx, queries_view, @@ -149,177 +343,45 @@ TEST_F(IvfFlatUdfTest, CustomL2MatchesBuiltIn) // Search with custom UDF metric ivf_flat::search_params search_params_udf; - search_params_udf.n_probes = n_probes_; + search_params_udf.n_probes = this->n_probes_; search_params_udf.metric_udf = custom_l2_udf(); ivf_flat::search( - handle_, search_params_udf, idx, queries_view, indices_udf_view, distances_udf_view); - - // Copy results to host - std::vector h_indices_builtin(num_queries_ * k_); - std::vector h_distances_builtin(num_queries_ * k_); - std::vector h_indices_udf(num_queries_ * k_); - std::vector h_distances_udf(num_queries_ * k_); - - raft::copy(h_indices_builtin.data(), d_indices_builtin.data(), num_queries_ * k_, stream); - raft::copy(h_distances_builtin.data(), d_distances_builtin.data(), num_queries_ * k_, stream); - raft::copy(h_indices_udf.data(), d_indices_udf.data(), num_queries_ * k_, stream); - raft::copy(h_distances_udf.data(), d_distances_udf.data(), num_queries_ * k_, stream); - raft::resource::sync_stream(handle_); - - // Verify UDF results match built-in results - for (int64_t i = 0; i < num_queries_ * k_; ++i) { - EXPECT_EQ(h_indices_udf[i], h_indices_builtin[i]) - << "Index mismatch at position " << i << ": UDF=" << h_indices_udf[i] - << ", builtin=" << h_indices_builtin[i]; - EXPECT_NEAR(h_distances_udf[i], h_distances_builtin[i], 1e-5f) - << "Distance mismatch at position " << i << ": UDF=" << h_distances_udf[i] - << ", builtin=" << h_distances_builtin[i]; - } - - // Additional verification: check expected distances for query[0] (origin) - // The nearest neighbor should be db[0] (origin) with distance 0 - EXPECT_EQ(h_indices_udf[0], 0) << "Nearest to origin should be db[0]"; - EXPECT_NEAR(h_distances_udf[0], 0.0f, 1e-5f) << "Distance from origin to origin should be 0"; -} - -// ============================================================================ -// Test: UDF produces correct L2 distances (manual verification) -// ============================================================================ - -TEST_F(IvfFlatUdfTest, CustomL2CorrectDistances) -{ - auto stream = raft::resource::get_cuda_stream(handle_); - - // Copy data to device - rmm::device_uvector d_database(num_db_vecs_ * dim_, stream); - rmm::device_uvector d_queries(num_queries_ * dim_, stream); - raft::copy(d_database.data(), database_.data(), database_.size(), stream); - raft::copy(d_queries.data(), queries_.data(), queries_.size(), stream); - - auto database_view = - raft::make_device_matrix_view(d_database.data(), num_db_vecs_, dim_); - auto queries_view = - raft::make_device_matrix_view(d_queries.data(), num_queries_, dim_); - - // Build index - ivf_flat::index_params index_params; - index_params.n_lists = n_lists_; - index_params.metric = cuvs::distance::DistanceType::L2Expanded; - - auto idx = ivf_flat::build(handle_, index_params, database_view); - - // Allocate output - rmm::device_uvector d_indices(num_queries_ * k_, stream); - rmm::device_uvector d_distances(num_queries_ * k_, stream); - - auto indices_view = - raft::make_device_matrix_view(d_indices.data(), num_queries_, k_); - auto distances_view = - raft::make_device_matrix_view(d_distances.data(), num_queries_, k_); - - // Search with UDF - ivf_flat::search_params search_params; - search_params.n_probes = n_probes_; - search_params.metric_udf = custom_l2_udf(); - - ivf_flat::search(handle_, search_params, idx, queries_view, indices_view, distances_view); - - // Copy to host - std::vector h_indices(num_queries_ * k_); - std::vector h_distances(num_queries_ * k_); - raft::copy(h_indices.data(), d_indices.data(), num_queries_ * k_, stream); - raft::copy(h_distances.data(), d_distances.data(), num_queries_ * k_, stream); - raft::resource::sync_stream(handle_); - - // Verify query[1] = [1,0,0,0] - // Expected: db[1] at distance 0 (exact match) - // db[0] at distance 1 (squared L2) - // db[4]=[1,1,0,0] at distance 1 (squared L2) - // db[2]=[0,1,0,0] at distance 2 (squared L2) - int64_t q1_offset = k_; // Results for query[1] start at index k_ - EXPECT_EQ(h_indices[q1_offset], 1) << "Query[1] nearest should be db[1] (exact match)"; - EXPECT_NEAR(h_distances[q1_offset], 0.0f, 1e-5f) << "Distance should be 0 for exact match"; -} - -// ============================================================================ -// Test: Inner product UDF -// ============================================================================ - -TEST_F(IvfFlatUdfTest, CustomInnerProductMatchesBuiltIn) -{ - auto stream = raft::resource::get_cuda_stream(handle_); - - // Copy data to device - rmm::device_uvector d_database(num_db_vecs_ * dim_, stream); - rmm::device_uvector d_queries(num_queries_ * dim_, stream); - raft::copy(d_database.data(), database_.data(), database_.size(), stream); - raft::copy(d_queries.data(), queries_.data(), queries_.size(), stream); - - auto database_view = - raft::make_device_matrix_view(d_database.data(), num_db_vecs_, dim_); - auto queries_view = - raft::make_device_matrix_view(d_queries.data(), num_queries_, dim_); - - // Build index with InnerProduct metric - ivf_flat::index_params index_params; - index_params.n_lists = n_lists_; - index_params.metric = cuvs::distance::DistanceType::InnerProduct; - - auto idx = ivf_flat::build(handle_, index_params, database_view); - - // Allocate output buffers - rmm::device_uvector d_indices_builtin(num_queries_ * k_, stream); - rmm::device_uvector d_distances_builtin(num_queries_ * k_, stream); - rmm::device_uvector d_indices_udf(num_queries_ * k_, stream); - rmm::device_uvector d_distances_udf(num_queries_ * k_, stream); - - auto indices_builtin_view = - raft::make_device_matrix_view(d_indices_builtin.data(), num_queries_, k_); - auto distances_builtin_view = - raft::make_device_matrix_view(d_distances_builtin.data(), num_queries_, k_); - auto indices_udf_view = - raft::make_device_matrix_view(d_indices_udf.data(), num_queries_, k_); - auto distances_udf_view = - raft::make_device_matrix_view(d_distances_udf.data(), num_queries_, k_); - - // Search with built-in metric - ivf_flat::search_params search_params_builtin; - search_params_builtin.n_probes = n_probes_; - - ivf_flat::search(handle_, - search_params_builtin, - idx, - queries_view, - indices_builtin_view, - distances_builtin_view); - - // Search with custom UDF metric - ivf_flat::search_params search_params_udf; - search_params_udf.n_probes = n_probes_; - search_params_udf.metric_udf = custom_inner_product_udf(); - - ivf_flat::search( - handle_, search_params_udf, idx, queries_view, indices_udf_view, distances_udf_view); + this->handle_, search_params_udf, idx, queries_view, indices_udf_view, distances_udf_view); // Copy results to host - std::vector h_indices_builtin(num_queries_ * k_); - std::vector h_distances_builtin(num_queries_ * k_); - std::vector h_indices_udf(num_queries_ * k_); - std::vector h_distances_udf(num_queries_ * k_); - - raft::copy(h_indices_builtin.data(), d_indices_builtin.data(), num_queries_ * k_, stream); - raft::copy(h_distances_builtin.data(), d_distances_builtin.data(), num_queries_ * k_, stream); - raft::copy(h_indices_udf.data(), d_indices_udf.data(), num_queries_ * k_, stream); - raft::copy(h_distances_udf.data(), d_distances_udf.data(), num_queries_ * k_, stream); - raft::resource::sync_stream(handle_); + std::vector h_indices_builtin(this->num_queries_ * this->k_); + std::vector h_distances_builtin(this->num_queries_ * this->k_); + std::vector h_indices_udf(this->num_queries_ * this->k_); + std::vector h_distances_udf(this->num_queries_ * this->k_); + + raft::copy( + h_indices_builtin.data(), d_indices_builtin.data(), this->num_queries_ * this->k_, stream); + raft::copy( + h_distances_builtin.data(), d_distances_builtin.data(), this->num_queries_ * this->k_, stream); + raft::copy(h_indices_udf.data(), d_indices_udf.data(), this->num_queries_ * this->k_, stream); + raft::copy(h_distances_udf.data(), d_distances_udf.data(), this->num_queries_ * this->k_, stream); + raft::resource::sync_stream(this->handle_); // Verify UDF results match built-in results - for (int64_t i = 0; i < num_queries_ * k_; ++i) { + for (int64_t i = 0; i < this->num_queries_ * this->k_; ++i) { EXPECT_EQ(h_indices_udf[i], h_indices_builtin[i]) << "Index mismatch at position " << i; EXPECT_NEAR(h_distances_udf[i], h_distances_builtin[i], 1e-5f) << "Distance mismatch at position " << i; } + + // Verify expected distances for query[0] + EXPECT_EQ(h_indices_udf[0], Traits::expected_nearest_idx_q0()) + << "Query[0] nearest neighbor index mismatch"; + EXPECT_NEAR(h_distances_udf[0], Traits::expected_nearest_dist_q0(), 1e-5f) + << "Query[0] nearest neighbor distance mismatch"; + + // Verify expected distances for query[1] + int64_t q1_offset = this->k_; + EXPECT_EQ(h_indices_udf[q1_offset], Traits::expected_nearest_idx_q1()) + << "Query[1] nearest neighbor index mismatch"; + EXPECT_NEAR(h_distances_udf[q1_offset], Traits::expected_nearest_dist_q1(), 1e-5f) + << "Query[1] nearest neighbor distance mismatch"; } } // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/tests/neighbors/ann_ivf_flat/udf_results.png b/cpp/tests/neighbors/ann_ivf_flat/udf_results.png new file mode 100644 index 0000000000000000000000000000000000000000..79696d7a9f06d293918cd158548c240f42bd2543 GIT binary patch literal 217724 zcmdqJcRbeb`#yeGR8&f4vMD23p{!(U$<8P<8Ieu0%2rZ}(ojYsWMzbq5v5*svCRiyPWj&v2wSi9Jcawb#n1^aCojoS2yN|Nexii@TjzwVBr&yb6u0mWc<2!m^qCy~6HZiymbKg|c5w z+2G8b@ee-6j7NJ`Px*c3xE~prqBdTCVZ+whUTN>@ocx~|y3MQyrMy$O>*hW0wGp!v zv&`+>M^CSQK}+oSpX!p|1+T{op4Ft326YtOaR2oBrk%Il$ydIg*rN=DA!z7wqAng{C~egq*M$I&40g^+I_b} zTHOErV!W4ZR961?i_tr-MhD& zZY-;Q#Ne@lq9S!u`i&bmlI|}5BR$>je8Y~Rp&`D=h7TW(pZfOix-gym`=E`h^0jHA z)Pno_EqytT9z9w;t(_?Mu;P-^;xE;_%A6cg6;;(?uR3d;k>n`4xR$hoBaS_32XDOj zUR=c-7#Nt+bpGDGdo5nAvc)Ir@}bPD7O%94lQU=H?fww=@0F>C936#oj+F-EbEF*2HP0}4ynTFp+!DW|s;bJk z)0FY_>C*`czLJygOH=0(=QNI01#*1<^pEPR>pOKH9NNMtq(?PBKc81npb|#Q^tR~K zQ4xzu_Vee@chkjBuTWa|DW+iJQ~eDw3)k^ZiPv`(w{pmN?t6ZL$;#H2Iv^k*;nuAb zwW!VZAIhYecbeXxQoKk`!nF30o)u(m1I82-6kNyJcZL;Q6F7pSw6eIkxGSg1xkpCX z1fO7FbTlKMl_o{$^lEDAfVL=x&!0bQX=`h^yGvTz*wAm@94zlM+23CslAsi{SJ*H! z^yM`Ho2mZlZaV3X%?u2&2?>!$?(H4ut?=ut@Z0@BJMlu%sg4wjs=#i#%NaTc4jfR? z)7zYqk}~z{n+?0dnN9D~k2Hu?Ed=9bPRT!2m}AZz?w%Qb#S$l|t74nd(QujT+qWUj zW98l(UfmGWytg+XFYwP3itVdTVRzO{}busu6Tn z(;r`wCD6XBVDc>XQ_Q`4T(@MLGj)tI4TPGtJ%rCJg*63#+2b?G-d%Jm;^W5?&BG5L zK8(D4S<~KLQ2XBAOTOPf4UF6is?Rog?4C$bCULk_yBx_W{_Q0$7Kd(1st>Hmi;H6% z?=D(rQy;BcI(d;r%=OQ_mvX60MCJpRhM|Vb5+`14V&Ch}+S}WkS6tlgt;I&q!EtHv z&+pndZ|EMG7PWZqQL_jO4W+`%Q8P0$v&uLbpLcq!evw&x*Dig*Ed6xt^G@<&!baJV z&8eC@;^KFj869l?nA_Ug%6?hyvB!w^Q~$ZmEMn%N$;noG)s23oJ$PWb~v1 zoW{0>YCnu-3 z$*ODE_qgs$_U|cn?%K}p)K__2=cKJ|7{cU4`3EeRT~Gcs3l2dhVS{}*xQ6Zd`S~wz zIP0(W@bFmAzGu_EdX5|`E2}5I-?yivrc!%dW|s~A^v~^~>ET!6($a>0TTSFY)_rWy z;=Eb1m5GVk!ongnER5!hudgqX;ONo@yu_zZpD2=YCq*qPqY*+hU;eqhdn2}W=M{OC z-#@=z#tM%7_Lh2d`a@ZUSxHWD?OR$L{nkfjEVhkt$|@==@uhY-_4aNT>#hRZ1nE=P znNvjA!#8r#xwyDA+Zq`e*}qL%!OqTp<<6aGpRc{ueSP=z>0&hkUfrYq-dIz!5+Um9 z>8W<+%$}U%<>5HdG+(e?w2FSSY9XsP6mHsvP--p-`J14rpS^phf8~nn_fLlv{ASk= zH^%Fi-8EKG$u+M))W=Q8^c0<1BPN#1-jGS-HP-$dD_B(E+-T|Syz|V4mX?-0m%hD+ z66Kh9cz9@EP4-q?R1R5f)pS?U(M8oP*R6dbaBi9!ALz_f|9O?UA7dS{ad9*EDphZ2 zo$P(;OZ$%b{{8z&Se!ip^MxuEZ{O-XJ@akwcE!*=_lD=ssTsB(dUkHbDoZOX9trk{G@`g8KQ)B7j%d>YaFu%0@V{=OtJu3o(w z2Sokb;PZjW$ry)s4@e$KaX#bgtIQiKSi#WQ)n!t7Yzh|A-Z;_-z z5eScsjWu(@@dTF%y5W0o-x`j?%nbsugNG=I7$`qCv*jO69zpOlqZ zcjj5`@%qBp^6p)Hahjvj;NW21iIth-J|CtcIIJ@ey7zGIuPg6}j{s^(=QEe(of@6nV0 z+zOL%?q1s-FgLxe(tqyikn@{+N)}U3ZGBk0=cb3L$BvbG?oZZ6&@qddM_=D*R`c>D z&B>D|-*$9FBqnkQ8)Tf@e)v{@16NS!<;xp`Lqa^*OAdK@7LK|(IXYfg$0BN37fBy{ zf{OCCxmg*9&TabR{$_36{IToCcQSL_tVuRO8cp?{=+1lmI0R4t%cq;7Mq`j=v>{Q> zt7g3GvD;uBO<0G%_ubvj-3D(zjdkR<#jsG2XTqWw1X2(Zc_k$g=g+SoAYyzx8htQxeX*+^N7-B15W^9IdL`#GDY9qHqwMTF+gHpu`84goBBxiV z??-gaObo7F#)j1UjvU$eXZ}a+1*RR0jU_I9qDupcJMx?9$zq-T@nzWTj>R=j#j}yy zohVy;7V8@t_(Vk3vxpvh2COvj`EwMGRhD#FUCEpFc4ky|HJo(n#3rPIKPM+wzH4g> zd-UkhXhC72)WY;DG2^Fa@{?3oQdCq_HUSA>Yps!iI?KFvm=-%doGpIW(jtEP{hsXO zteM( z;==feD-Tj4W4He9ob1UD%8klI`&_&uRb} z#`da!-!dc~0@NJp3w6uDD91;K=nm;E$P&c$8$n*T?rdQsi<;lhBKA6~4=}>a|g9`qh`t;A% zuxUUrYrwzGQV#)w7)*;C9fWF@B&4OEKXDr>c+`=izV42KubvJk7uUvJyB@PUdjKaT z>~^`UGX3VRqFv2|a5|NcFMicD)e{OX2gs>UV8b}lT_;Ls5Dn5LQ<;QmUi z+>*KY~7X^4rcXW6>-BCs_FaDG`fdmrc?6I`@6#%;JJ9-XjXXl7vH6+Vzjh z=c;KFo%*&m=^i=MZWLwF#XmnS!rb%6R+Hvrc)jLw<08y$t(3GHsc^D?)j7bf@!kqE zoymcLSgzzh0>6I!vRsCesc$pPE6VKG{Km4x-no+<7U?dwcdxnB`E=Bj6rbb=X=%xR zKPSmr+Dob!eeJ8#ye;Fb*0kR?aBycTBxNRe2QAt)K!lM~~%Kjno zL#APtf&ky0J#H(p=?v^gAKNw6<6O`hN~!?x%eeH42re!D{^}9;LRpr9p8h<}9qmJ4n6vL2r@DlQGw zMr1whS;8hJh+D0k|M^u6k12xlgP1Uy>z$LHzrJ^!XhO{U)K)c~;yw&&(H}o3H5lwFr5j(sgKV^{&!@ssSQniK%v;JKlp!9mxh#ekwo{#TRIqA0EPm$~kxvp()Q?dd6n zd_v7J=z9mJouRenb$0q#Jr40l&ZRFZ3 z*oq3Y^}2}3%B@?sHamB9b|zUrzd+Wds7N|GI(idcU-L5O0H7?b*F8nQa3Djg`I*m- z6AdUjD=4U@t{xt%C<sw}2(^86opd*jMX~ojy-D0rbr)MXy4gqY@DD{G17Ez=-0QQ(bk98WNKbZ z(Boa=gK#?lCk735VCbi)k0IBYb6iYjxj|?sVBzxA@#`NI9gJ09b>&z?8hLOkp zKGGCicD&qMJzm&w6C2wF;8r1#UAuR)1Z<2wJjWA%pt-`&2b*N1ldf@zEyFO287%}U zAl1b|bHgmH5B1le-+kfDr4+cZmV<+%(6Nmcr{;O7#|YIsu@jv187LUP5QlE)#1sM- zjFZhRExBX3ltlNeuJX1Rv#Va%gq}l5NlDq)Jja%VLEQO&Vf>3_7@z0!)tsadE8!LS z<$sVOXf!o7i%N#Se%%-p6cq1uc4jnIjD`k1!E-dPhV5=%UjOK;)O@X4ia}XPHF&6( z^6K5YcM>Rs$|J2v^dEbiJ8}{iG&S$ z=uvQtcKtKYsz0;0lROqsG$UhTj1nGL;uHsm@>5uMpJoT35*cls{`s>3h`Q~9keHj0 z&8brtanfF0PDdTT;GObibhICZ)xM{A=ecOAa|O1qc(-j+Hy4nV-9k=R_E(?XOa_@q3Z|a4xVP(VPshoN;6&6R~59;$H#|i^=jeP+Y&b6 zrZhC0qp0wD`v7K!C&=!47DnF-oTs_SymRFhi48hvJHGW-uL_f^tgP%irc%=}Zz8UQ z-P3S&>g?$mvA?y6uzH&tgtn4)oK~l=GJyI!S3_Ohi;+*%l9RvQ$oU9y?;WlUGQm5L z#zakBeE|J{aiza3!PPhzmah{eu+?`)6Mae(8RpI6O7#UEkB*K8Z&0fxbQG5H!kdAB zUsG26{QQD(UY}dk#W^nAy~{~*)bZoT!=~||8PflR0BQ&Rp4jc1u5=|<=Sq>{LxT%| z2w{@>4|(Lg$0LDeqk65(%DtTzW_w(^>f<_|#NEUqs6=RtwLhYsSg{ZJ_2bL66diob zIW4N2wwyOr9^f2g7(HUs)6;VusG-ibsNy8`^Zx#8;wN9Tl4A@KgE1SexMER61sb{C z4-Vbr9gPMMe{pAT02^SQ?jF&ZnVCGm7N=|UPoF;Jjz7S}dGl-gPOZyaO5~NK+#~ip zoPBph>E$(gN|JHD_2|3w%Sq6?Vsmiv6I2Tm6hyeIE-`tDV_A2Tyl9m61A#`~%) zlAVyqgS~bg>7bm54!>e@_G}qcftbW>L&1!v_`ZOP{^LtF^)^HH{p)T70w_Tt*>FQ}!r=^e-vU&-L?3II2nFI_!7 zHbBhNRU0;JXnth2TUjyxXL|035}XEs`+7nET@gWxbDGJ=EG)R>y~hRPM#Kow&=JnJ zOc*n9gOx0F=`(i~=qh&ZdHdMz2vGgZ$%&~#u!T27k54OBj)xyF%FoyRRc&;8<*HQ@ z0rQ>vY+(BgVASg1mS8b&`D#Z_?PXX?p8hpO)SgAYp_U+rJZLL)u0H!P^ ztmYUtZ+1ls+nQ@(VOxPDMtgM)B1`AK7Jaou!W`W~N3O-X@N26judbHjDRFyv)l1N3 z)v8s-W@bYG2eiXCuhE~Vo77=mF4@?uJcoDN0jAX-JvkRW@IEyqg>mw_(e@I)EhiE~ zKA?95nll`m{7~}=(A~b}J`HIS=DYnbo__!2JTTwG>}$SMs}FD5+E|HQ36}I~Qg${$ z0Y^71lHd_AGuoW6%TD`l50{*026vYuwv%)k6mEdVDc?+_Wc#jyG!+x1O2!9M`%$_s zu}Sw24hEw}C4qVqw5U`-(;WpOmv*zJhf3FPJMA1Z>NFg6T8=z~te|P3AYNuA3k%DW zCr?Nr&p4XPc_%SZx@=r~i{hZ1c zeN_lxyvoh8uSYD(U4A@HD!Y2^8nW4W6hoo7u`ge&Q8n#*pO#+d1vke#Uo5^X2xOe| zJ$sfmaG#RzH2+?dJaqca1O0wKzNCq#g~;Lbts}(JqiY}XveFL3>+RsFuC3*>_&QzT zF+xWe&lybv!@BU#|G1nzE4$RWGyZ^d`*lf^k;^yoOG=KO3b2*(SOo86dJGnrXIo(Z}08?R-TX+NowH8gc2?z`U zTK@tO#U$r=KKgzFV3rL~sA*(Dzak#B=jUD=^-J6M6K>v&+Wz9>FuRDo-a|&z6@s~` zUV#VTPhb`(X}rGkBH;vT!q!bof=GPo_md7U)c`7kisMhnnvF)RD^|Zqn9c+P9o)=l z{`3sb87`^ayEVbhiTg|*L%+@2AaVQFt(w+W1M!w;s45=sjRMWkg^C_~l5~aY#+57F za>GYKPe`1dv5#hxKEQWRwXXN#^IpDl(GDyJu8RnrdDMDSkGrbxo_p{;>UHsfS(u< z-m>EDn~mv>lal&vx|vQUvApV1^qbE`mzF%1l$QRJ`RU4T85s=#sQ#g$5cFR(*X<_G z>G|+e8McnmAp6Fp2c8+#8d^b))^4*{;>^a;q7MLftjd-=u0Z_U6e zrC}0Erd&XWj~@la{cKs)r6m zCw1bG3SQK43ig(bVy9!$Xu%}DSj{vO$Uom<{u51k7yOEC0s4S*a66_mv z6i5CCJtZzf=!$};0RYy8?_6Zl;@>P~|CWOEcT_NFZaS@|59pIA+bmt1qc!Ka*@UxU zh7OO9a;9<@N7Q9b#rdZ7$@dJ~WrtQIVb8&LNMs((jR9e{k2jWENJ!{N;(^k`Mh}md z7x$kY1ct3aE`0ak@GCC`>NJ#0XecErUFk>ep<755Z}4v~f3qUeJMTey`VbiLTN~pF zMTLcF2?v5h&(wG+IvVh?-T(8mvF6k@^)Vb-;`jcbS3#QL8P#(BP*&8UyMcc*4cd&5 zy#ez`kDTaK>Y!U}Vq^?OZ!=UE#SnSn!qtwA8#gwfj6LR1R6PXHM)fuKo>EY~MbMx4 z^v^#0o1^-7rC{^EJXSYqZ*Mowvs}R}Vtg(yPaI*tR?MQZF6(ISrcIkDh{!N!=v)kF zEM7PJJz(|o^HaW)Yf;7gXh-}3^=pWvrggglnMD*P`>HOJsCe+g-QB$gou{;nFcUVH z{?Eq0T4v#2lD+@ySF!G>p@9M2eYyc0wK*3QHNiJ@7T8`)svQUZb}Y$4xBBFr_|Ba> zeUwjH+1P|2Sq%_=Wf-`mhJb*wH^4$vwt$;#Q6sex8wpMrEfCUwV1Ht3H@8eY`uWV7 zz8_!O7!C- z3=9mUE%N%(8hb-AXpx=h4;eRlPBkVehvZpSuMDGFLrIXZSMmQ;H&KZ)>^}5v^N-0g;k{ zgMz2CJ-(pbu!Jx{&D4HWc|7LPlP7zsH$KH$F?(*x?t@x#zn|4+cctCb#-`XpdhFf?(#$v(D}qIwdDtap|M(woYY;ii@;IMzeINKv~qTS zq;vhpv6bg==()wkbryY$jag5=PDq~H=fPc-JG`V0f>9NuE%i&nnLh!OS-FkxMP#Cx z$>knE3)C*Ulrb(eBt*M`Y(l+%y`)QDB~#qCZQF)0+ns&&5A} z{_`{Y9m=g!9SHk=h5Z+Vl4uTjBb}dsjhd)?@x>O#-yS1^?a!DQm+^BjG<0&x9r z_SN0{FBNS%hz1cPd1m&z0^5UoejARjTWu&@pv z3hehK_9C;KW3ttGV}wRR&PK$pic(w2z;KOyo9qc0a7N2?vZfB&5ag_Ww2zu#^RD1| z3G$K)pgim9lGS`E1eFEy!}@d4p_h<|-am0uwdx#uUAeUASN&o|SiDmekY}bvrGFIC z-G!K`*dqOg>gp9_iIM1-8u38U8@9?k`{$nx5Yty(k#Gt?CAzB=BwE$;m|H^xEzW9x ze}Ae(SsR87oOaCJW|C(~Mz{0(D*6)wKncYwE`j zb$9oI>%6bg6t5;S5F|d@VYK|gplRE`)}S@Xc{(JHej5og9Mn1R0hL#|=V#u88s4!F z-TUgrX8@Wsfo?&HMq~Zf$(^M&4YYVwkK)Y6eU{Ib_0VSbstVbEV zpO(f0KL>h?&Dl3@eD(zucNxtTZSPS4JqiC`u7~dJ<=DU`bq=`$fc6_YZX(C;-MsJ9 zXT1{u)ZzGP;W+R~>vM8)GW65wu%vXLUy5vi)kL9hg@IJ4X>O)RcdZ8H&&t|57!~AM zU!MiffY$c0?@+OLS#R=7OEcjG&>KYgvpqs0d3o(%nfJs6Ij=9IIj42L#39cNnH`MZ zcJw%|V2tpZmF+*}1DRXzyuL>r@6cT+ROa)Qj@za%ib#Wdcic)yNCF9)us5KrvAC6E z|FTn}?ynI?>)`y4u?R?zQQoUx+Ly1&5|@;cszWNGPTY^fyvMcwoQer{!fj?$x2Ssp zWLgr#7|8CP=Ec@mZA`K83=gBTfZti`ynip^0+(O5xkdQj(kJW3t`@0O? z?9}WmbH%HCaXp`^%HNanfF>q7i?770`oRnamusb_rnXf+mQn(N*#9Dy&%P})US)8@ zl~{hasea|1>JK;&_00VI_tVqYHM+Z3j;sMG2F0F(97|-i;bFjL)$sM_amHE(^4HaX z5^u}2b9TOD5jdx=V_xKFDCN?7Z0f0|y1G!|qpU3K`MPS?AJp5BsVvHoifql&k+Y{m zL$ND@hb3Pw+!1~U0!BfApYjY=f%wWJ2lzKb>QsB=4N)%`nmesr>-?@?A;-i}Pb* zFl$)jSOwea>gxVN^m?@RB30*|{vcPqmKvF`7Qyo|sHdxI00&goQ66%br&MS*QO5wx z1mkYryqS!B2tK9i>Y7(Gx(+${x&S|L7+QBO!~;vTwi`Ot_mu%=4(m}DjpW!gtQwkv zZ>7Eogn8v9!{(Zr8a0NaFd5|oQaSZlFZ-o*dtDn>qbl+4%zyk?6&NQ@wB&CSO)Hv| z`y!Zn-P?!y0%@Jwg$9XXc}P0d1k zr!(>AorMkz5K9R2d#NP%@Z{tq&vGXZ#I(pkB)}gXWXtGxpW-l7A19aI?ydTu~fJNXovA+tb1IOxE(U{9-3Fk z!I}{YdEkO*-&5aR=u6{sTwvG$x=2E(%rRLP(+-+yQ|E%Tguu>p{L|!Q&UApGrhuJN zz?Q9BhjObHH*+xY3lM?@vJ??H0D9gumsxiNqUUw$%u^m-BwWBmUG|;5V(~kU)}y}f z=;oEI9rY-)q`>ZN^If1IdzB z$=o*+68iBuTz!0wvYIPmGZ8-Z=U1(veg2_BLqkJ+_wEK@p9p}xB)Es*fhua$J^-Hs zkv_pN)B;@q>{B62?2(sGMgB?H9Bfp(^kXQD2J&<<8Csikd%-M5i$bap?>|8shvT-*+$u)$k1<<){Mw z5&zsMYF)8m#aSP{{I6g3@*+hG ze}4ASy4?`cUmX<{)lIitKO2Mot>N7Nj5!QGVYAD$5%UdVGB(jadL7wCbpY}2C5eb#f# za}dX&Xwuq3Eh45?@0Gmq;}a9o?d|aKpP z2)pKp7r^Z>K!9 zIR5o43x8br^T+Mu3;M7E^xwjd#NrQpm*+19SO@952Fae`I43*1NMvGU!Ozd{(%IXB zU^9UJMr;O*W%olt{8@wPrM1PIwCPD%a2>XI2gk`<2!=&D|KP!h>oGVuI4@`^kc51@ z87M8N@DJK&3Cjb{x4NlmBZ!hF-SIfTe>F(PMwlxz((K*epe})eQUPoxyfG(gdmZ3w zA^R2*^7OV5|C+JG-jjA=5w2McRIOI?KDY((KWBb?iGQHFibCvk4k|T32eH?$Ul;4y z)>4?A_{kcjCDX_Z6j_GB!_AxuXRdVQQ7FW#h+dfp%0!d};t7uV0-Fo5)j`t;kI^2u znVgSa^{>bA*GY(4+>o6?=aiZTy}Y_D<>gvL5qZhG3ckUp8-gP8fEU+Ak6(cJv_G#G zM4S4ni_ui8g1%=0!RWt!fG^Y?nN8pJb=48yx00#TUwrjOlyVEm@XiD z58@`TsHn~Q#B!F47(zQ`(fdbk;ZH)MqP)pRIbLm1EiW9Tog#Utm z{hwT+-7dX#(A23oVt|{0jPxs=t_doksgpS}hJDry>}|_7VSxMqgIek7CX^EN`1J&7 z`$z)KFram)!wF?<#2dN&_+lvK)G6N4lU9?4-T@xXI4-l8n3$=VnL5H@yf=a<{i*4m zn4kr0b2V$%So3khK4=A7N$~wUKnT2wipP_)Q5ic+T<-bH-9!0+qJ5l4XaH-o$Nl44 zG!$_|f0C9Spu+Fq=SPjGZD?4d|KL!wwZY$YNUZUAMYVh=Xiza!#cTGVku%JB2fq(? zpN&*0WS;3(p3G=0R81e=g1NN|CMhV{@j2I6@Obb>=$=8m894l@t5X9)7H;imYkLkk zR((v#|JNo^fU59C_V@Qw`1tsU5fC^Fc#j*X@@lG8E7)tI1t)(wc^Oxnl_oV>(zfv#%%VWD@G@wipKOM!V*OSb{N?lv>Af{j zdtE9-tTkXi)QBg*+WKmGF}k2=@M{xcLZ4YL4EAR=lnNaxd4#WwbCrlI$L4u;6jP{*9=YdK-j>^8ch-V)J zouZC*w|{grA-$M%{3GR$A3wHaO9DX%s=*{#L`aARen?u|-KXDgdb%3QFS_rm0+9Yv z#bzdYj&5p#mBKxrfOAd$w|>Y#RZe=k`_z_G{)t? zyJbrP66abk{Q8!G;6TO@uHpnaP6LzdGmvoxIb8UenRzZ2mI#!&B;{QUE3yvq2r zAICC(v!D3T3xw00ZL$mOOAP`E-V0Z6?|MY(IYbdRyw|o{DX!31iQ5>NgLoOSAnP}5 z(0zKw&4207Pon-~AHs1)c_k(F9OOxHLZ)GAEI$3=*2FB~^CGog-CvFb1N%0#1ho4W z-?p{I*FS$g{DcGc>M(DL)@><=l=d=ID*Egm(CrTqp$jg6+zxP{_$Pt~azKPIwtt(N z+El0yVU7LkMaq_+lq!0fDAp&zcYvFpDhle=c4c|1F zViWcwj7z0CaCEDwz<0hZq|t*Ap|je{aC%#ikc2*L%s&Hox7`^6VAA`R7QM>5D4wnL z^%@faSU%l3f5)qxviWvRoDVUOLYTc4m^V#8Eto#6s;ZI#joi^g-_w+o#C>U8l{mG2 zAG|K|QokJCYzsD4C?FBbmxjZC1t|1r-s{?0)wg%cI=F0qV8w)`B_%gsdXAHiPM_`} zo)KP514jpkhflUtXQ4ju6yZ^itvX2MpR{)FL` zvDM4NLqB5|S_I`ppyQ}w0dJ1>ckiyj(K&Kv$rh#$0WVKagLyd_$99#>i|HxtnMn?WJZA;6fRvc}e-ck>{xw0>N z*XTVoVB_Kv%jcr|`QKhIlJ+5JT|x&XXCvTzE5DmE08fN~9ecT?y?=i-D3;+M04g@DK>U)k zom~X>)!ylMLFt=aNy9N zR5#0LC%Ub}k7lJlBX_%cc|AwjY0czfW)7$0ko#zV;Swl%db>AwbjuXbgCRpSfNyR+ zt=|*)YhVqj5@61WmQf$QB?PZr3-^4`pC31hYZ+e19jZL^xh2ix!%0(9Hj)N`QH356 zFbQ>Y0OK&Fy)k8|>*vs%^MFY9^u@poaakh4roi_U85O15-Hymo%caKosm586X7;dx zr$<=stOnj!ee7az?kQ=asJPBe z+g2A$^px0u+?8e16_h6=N?f8+kh#qSjEfM|K7DqOr!4mqn{N~it)W00f0$+;O1M9` zXOAxaB=Yd7b`eO2Ys0T{Nmz$!q&rd0p|!Ne;h?o83Lw}B#ztN~zGXwTBpPJT$@eD7 z*;os-YH2Drk+)DC1|Y&GB(UDWn_ID6c=1AuNT)x)_C?(7U8*!!S5r$u5Jz6T$WR9< zgqTuOL91agtFf-cxez7?s7)Gi0!$G$LLxihibwQjel%IJdGqGC`8Jx(F#LVc2O|ESdp)v{OKJ>TOMdUwJn zqgsMo`8NOK)UpHeV-YdXG@^r$@Ep}CY8N2KGbQ-n;mQ;h;Zp+A6j-TF@oaX3D7Oy` zHeuu`Tlm()W4sml&QObjL4pQwk81Kc)5K%@>vQ@&MmcsLIGM4RJ3NX1e+@)g+Oe}| z&IrDC2}G|_{UH&^0aF!mDkJ`AfG}K@Dn3ns4#fA=w_6JAn|Ygk^BUmX!E4mQ)1Leu z1Qh%%@k@#>y$X~q-z)46YZ1KFo^yQ1bo2@0&|Ef3Tu=3Y#yp(gY^;Y}TN8j=_$nWR8s4%-4h!-_0{u4J2-JWeZo^KjIeW)wXF&b5XM zmbSrJo57yU2>{@LD+W1+WK2l%ry{UPVtJn8U_U!zj*^`Vwb|%^+e0<1o?ptf}cpvdF0sT1s_d9N6W}tv7es#aP~_fXnhN*mu$- z!Vl@bBuN_?CLg0R%91N71g0*VI2CqF3do%3_5Ore7UE5Uc*v9C2L=s{=gIO!AYdEp zGwsW@n6&#sVU+W?g^(%GeI3~M?CiLSCC7V7w4|(i{rAj%?Yj&P0`k)eB=#rn!?gGT zp|Uqn%IVqJk0rUCWP%A7Tn0QxD#=e#rGx2J+z0V;ND=gLcuI}C(G&_ASNQ3|R7?U9 zh4D28(nv`xkAD93+Cg!<`I_$rLhbwsj_$o<`D_dfd~#(`F)?(Gtr_RR{iFbD%l2+` zX6O;1q$Jj6Hdgu1r4;=0v@ksf>77QyP-54vS{yE>VjElAH1R|oWd=VN6Y_>w6<}F1 z(imL!MSELIxsMB(yu6~uSI(Jr3jIUyh*xWdE|!8~>EslR5VYIs((bVX#xIAJ2O#80 zz{rn&+SoSQ`d&y)ZLMJrKpeihpa08JsT^>6GjqpV^kWHVOTqO`<6pmC25t~;eR-=e zX}`6ql%4iIlmq7VN_}Xe(b$?NW8(8vus7@#>!Q?n&!AV*(D{kxeOmhtqUGWnhr9Qh zLJk2ZULFtEUc-XMJz ztVi5((f)yBZ)YidRdKr2`^-oI$hhtGk<%7g=K9E)72kf=V5s`IR4C<-d;f=v&=g@%z zS2rFPl_{X6peD&4>Cquz@CrV);8B^UPoZXl{KM|e@;uH?)jVN{2}MwH`Z|l?jWN}; zIcVYA8sH&f14iaZMT~(G-(TF*#t7e9b3l_5){d5Dw+UX?)vSQ#XdOupZY#1`( zeYp(viwv+R0|I~h;M123ZxaT4Ht6Z=d*rc%k#60-w1A#AF5d36W;Tg1(R>@jHYql0 zx9pQ?<7e+Mz~6Z^%iO!e#>>I_{`uDupvDZRdT(648jRd+^|iNxn3OOhP8iKSva$ns zvzIcZ`^G)46rB2d&j1tM{}6ijK|oU50>HPZ@Mm7)RAfV*NO?8wXFx$$K@ z@8t){`OiTulAD69L1`ku*D`1#;eRJ0uj05Z4e8Dp- zn}M29x-nsy>zqzBpEdl;dTu>->=>18nA_ct5IB+nO@pn*IoR3N(y87SqDiQJ_3B{q zDjW1RgzUf=MYaycdMZ;m+~FrvhtjEH0yB+A>&qQmg^!8EQoUex>;r_Gn02oOM}l<@ zenYZNDyB_>=g}La`0UJjSDJdd!4Q*BZyy}q$`6Yr81pl;<6FRqlCgLs;pIuXGVpMO z_&{6yz^I3sM_BlP&h;A(^zJrD83{7Zw=4@UlcHsQ*5#u3@XP;(zCK|XO;hc9@7T8Q1DUiWPWBU-%}UO@t8l^Us>4T(5%M0;)>fu(|lzY=(grM5!-@F7wGtwAhV$3%K;QljW+XwmT)O@BjF09~VK?D8f>7GC@xR@VST2X*3>ZZ_e)@;F-z{QS}EvYWxc&s92O z{(~g>WQN~(q!iVO9k=bu@wCBo51_JH;`2K`$9Na5VnuZBQP5JPO+d@0k_Z)nOf)$b z3Gm5Ghi1xadU<(~6@iR-Jb3{OXG)?xkwW7zVv`CsQ}%*B2b4jqlpL&9qGA_;jiNSh z+r+{mAV!Q>^(BdhxSjyEZo=`z{{TNr$bRKg;{u)Ku~U4!{IfqCs|$cNtPxdgW06$n zh^e^|RFvDdL3JsI7PVvr@K@-9n>&lwF|1j$2Bfm!om^1gHIST{zF=DJN7ex?$~l}Y zn$4SWfr5~hTlAhL8(MaRE@%5X=lC|Jy3!D#PL1)f_7w}lmD!%X0nQ6IURc!#;jl?r+ zixo63_kLtLW)~>NNQP8UT?CIEAyxvkS;vxl@ssh}`6C19=DXjhF1GV6hl&^cF$AdG282#<8RXCA7Z_&%2@bP_+vI!M=780b;fl~x^aNkc@1~qN zdsYR*?Fz7xxpVX&`!r)+NE?cwHI{(AVi5KLZ!hNDE7F%y;@`eK1@o$yaaL4RBAMv&LAQB;fR&0o))qK*ztP20}DSa(5Dw6EKTm?+?-Tu_~>h32H@YejYnL36Dkh#@pSmUa2v+?(PfqPjf<#}*r|g3?ebS}vx5k$ z7ZnBkEV=Y$?VAs;-TgSn;M)Yr1J^)`${dzX7wD<*^anNwI8XfNB;j+oJ~X>73$8EX z`eBv#7J*w<7hr->%$yxRM1}#;#j9&J3TD{+Alajt8Z8x%shG#$S_%>Z1-&}RKo^(G zm}*^nHI}S6OuDHkqr`Q9Q6^sdAf&#OccqXMn1R)ew0h;B+>qO7&_be@Yzrc-w^T98lKpmw67p);i5nRif+RM{%_n$CD`Y3=b)n(mgD8TgiMJjCc#I<2!EkO9yLKRr zjNS$Q`EiukPsq^Z;ls}n;omTyMnz5i(!y`|?%iRtp!CRLu>|c%4hy;L1>{-H>(}eS z)Zo&O6?j;_NY%E|crV680OrWbqnim$r#n#nKRrx#(9$AqIbnv9GHi>JW_a@o8Lq@i z+V;$1E2w^u`xeGRshsf>A>ftBJYd;-7&8OrXLP&jQSOllR#hKwsjjYe{DaSM)Ejh#1?}&ie#Nd@GD`yl^CV^NJ9fGwFiz2SYM*S=Y{#3Z0u6{w+TA|$gD_|O~vK_9)@&nmAL5ODJExnya{zK z`0Cb`DcA{{y#Put;vq7fl*C595f2Wy4&?|>1A{mffo&AnSk_IZEOm#4hi6JZ4Z>an z{@h*}DQHtj@0IfzaUEPIPK?>1R1}~aHZoEKF3pFV3YB!{vIyb=^ zT0iso9aEzPCPkijzl_^y#tgfFQK?dwNy%vr+#n&MwGv|zOoWnH>cJQSC_Tr-+X#dP zj0nT2Yx46s?)CxS@VZzLT^<>}BK8(sLc(}m5vqlhiB8Yr!uKN}|Bi`Et3a;AbpEX| zZ^(X0?HR)8+Yj{RqWXv?s31e|bx4q{rq`|RkK3@^`h*z+5>VCFe#PA7@RdJHhEQ^K z&_>oY7Y+Z^ z&z~%wYQjVP{pUdLvf$-j9!^N{^mL!~(wC77Nl#C&x7Mj|Y&;-d72u6W4?{^v0UI#K zV8D}{m35K)686}@#14o98l~k5jq5?~XS0n!^(^+Y_-bi;KTq=GF>kl8IhC1Mz}-u$8R@R=6YYqirjX_}SwHBS9~Ej1@IW|?JZn9=`tsDw8~WitI!zgPQ9n`EDP$#UW|SwjCCHkA$kdXH?SK6GHWf8 zFEQegE&#g#xmcBWVbCkXE64@+p~%~Z{DuF(WJhisL9HiOnUGl@BsM{#Y*u7h3GZ=Z zxcJw>a@0*{$cI?CfEXOa@;$)>n>p;-@Q2KeZpSq+AQe^M{3fm+T!XUp0z6>?=JeDuB?Gqd`Z$Vq)z6oNbBl`r` z2e}I-+gO4)@Bo2Tut`$ITqt3+#IpobB(a-9Ck};hfyT2L=;F|wJ@mNiK-Tdw673V* zKSIW4aaiflV@JS+L<$n}gcV?zthR;_OopINBZ$!q_8c^C4OlaB9~kt|gxj}kL41?C z0-B&1rq4&(s>C6Oi|$rocHtT7(z0X_xWI~TPYdb*-T+S%j?$KlMX+zr-hxFF(4u{x zX9WuaD+!Ascq%b3NewzL0?af}VJ%WV#8@&Q-<)$?KK`rv%an(Zq>Ibfm0M!Nq=qncw(JrLx(%KdB(yK5L&)F0nd|R}2=6>C z8L_bHTtt!~*SNqqm0?MWG|!f5Ts(jlw#`~iZII0KFIC}!37n~f=zaHBfVIcXG07@j zaYFj|2Fa8&@s@v~VDnKju$iFXTcFZ~>6${u4Z78`ZiArmlKHhn60zipwy7UKZixo=r|5~|%s|Lw1AAP_!Fw#`ec7wJRVL;{1r8^4G$5=-8}Vwm8V+f)0G?UAtSn zicHCn|NaDRvYYO_pc$lporJ!-f1A}vN!P#i{_jsM|FdNext!8be(isJkNoF+@&DJ) z{{Q&e$^U%*|3e4la#{WRkN?}3bp{v`&rPfr{&FwO=C=3yeV-G^ALe$ng^u!Z{yg%l z|12yNa?sNMY51=jLMarqYDVQDZ(xw=9bkepBu~U6!s$~4@;*rmDC~svg7&Kj5wZgg5r;mOxuH!_;mA!CeyP)7&kW=dDJaBKE8}UTL-Ggve2Zf4Zdpa1P zMlzlCETDa%MeQFR4uj{{IpW$^!afiW5)=XAL&0rY7g)q%K$Q_1X(?*t+<)9UxtvIF zVO|z4#1a=5r%>)ccrXZXy%uAJ$QZ;_O_C{Kegha2WHgQs+A$@U;{#!Mf{&q`7HfzEMl=bh_(EtTYxV?ikAszcV;;0L;L z-oXosD^blnyWof?_gF#^u7%?b*l`qE3h7}A75_KDR=!|ekpd+WUVk;J!PF_yNzHc`_=8^EB-p5)wiWCk)sGGFpbWC$j`-k~|dU$b4k~ z#KZ>fp&!JS(Nc!Kj7%ZHO?Ck=+(|CiI(zlbGBQ-+n0J9F9*d+FAr385JOQ5`eVGoNGNp;*Zf;6IU8M zvFBrB!7Z|r+buA(aB%pnni}<`e|g&F^LXD7r#cvzlwyfPV;ZCJ8%2B#A{DoofST8w z_~Gfv7&LtZ(SubX7YdO}%#ee~Xd93ofPXk3Q!+@TE^mKu)7EHyaRkp92&*H_5s-#7 zHiUek4$}h|Au^~qfXPsDwI3Q*BC7!=3ii;o5MlxW+gTN)1h)dNFCtf5ikKCz2eG6H z7Qeem*&7#rga9H~BKUDJ+-gjxSz^WwUKboTdgNUd{tX-B6 zM~Nv2|mXD0z~>GqFCWz;8CK%7*h7Z)@hV$ehFZi#CGrxcR%(L10k3Q0EAE*lnKNnv$3&J*Ct_DK-Bqh z2Y_WFwK(qP%_6A)R=~`X?gZ_fq`m@bp@*BL4=R??z9&-+jEpLWzK6%abO%LSFi?mO z^k&YSp>2m3Bo!=VMXf+X$+NcD3zkBDP$7=G`TF|i`;OcGp}s?_rl}D1034i#+JQP{ zum|WJQTH7#+(f8=AlcVKeFg|Cj-!HO3tu!4l*KQGO)Rb6;xyz$h$HyeC%764|NNQ{6%Y7{ zU_2hIO}ypZ9h^KogtYa+!?|YI4ZfKG+qNSXAk^#sZTw;5;y`EYW7=8xGiewYKubvh z4Hq^G1AH!8T{Y&)3J(tacpcE_q(?=~3D+W8Qp7i1JOX|g7m_WFo?%(feon^O*}1df z?mXMVV`mUp=@~&Z2KxtI+|hyfL}3242Ukc+)pE=K%|+1MlaM()&@+}QdK>3R6m zWP}B7jg`3Gt#iK*_iRlEpDyTk02M>xLBv%Vc8S;Z^%n|Iw>!iH2M2rm=D@6T%MU0P z2HKcpJWYYR6^Luzo(2XX#aUIQ1^}{>5vBu%NX>Z*GOIZ$-8)luTOI;>nnDTGwOaTW z1v!D|+WqKRMvgl4Z1RjVwp%jHmjl|M`7i|_B+q;S(-gh8kX$gT8_+rzrPh%FzM`^T z3~o7%-O%x@ox;nT%p~Jd-@IiLs`u#97MybY5O6WI2pgUwxb;$ibYXRH;!oX-jf|Aa zw77mA9}t-pcO@CowOjydAb)1f_eI|zJhjr#Idh|Wt(Da!*xP2|`$$vqUJpA1KZs6? zm40YOD=EMS?&E3A-=YftJu8?#&9XawGGu_~uU+InS$$^Z>d!uoj>)^O0J4QF?zCFe z^LgXhC*d$^uE6d$YyG6~*GNwy!2J;9HuwsC@ZLYy`J#SUwJOCH$zLg47 zGwf1^f!a0s_8uf(;2D;N&wdqCe$546>hV|Wz@uoHY^s2 z%QrtFnPB{20z*z;hT`BvCNlt!tq%N#fDC!buAKc4+-?1`2w3%grntj&#&%o>C{b=M zdFB8y@tH-rr3qiHjXUxsr4)Dv|7)0oNjqO*ekgBWHkj@cI#T}T3eU}gvu-SYRa0Z) z^hjW?pc1q3f&@{m02t_L+_Vx?~pn&njSsWKh0Hy*4nchY2(_m5$#3$mW^Lq88% zPiBL97j4+l?pm4FxU<1UYHKN}uIJsV_S_Ux{osc5?6kGX9}u`0jI1Qe(E#^2OxI?CubETSBanmcZg|wd*|Z&!^hLy=$Nx~)DW zhC+thx1(*jdB?jK-pGKQ#s2GDG2v5L;1W&r(2Rz}J?V!{559UZah-}Z{v`#Rj8O>*{FGDUzQ zU)%PgR)AY12kC4qZ(A?>^%%$SbA2>|KjC0S`T~kw2{yl^Pdv)N&kE+8aTRn&103Vzp#uEIC%P^E9NyCo4=8rD zh2wMZM_R(S7j;dFyH5;!5@)&D;g0%26(w}ykK$h-d81BDX4>pE66z>+)~h`Yfal6; zzD1Cl!82$o`LUsodZLfZ#xj-!;JAj`r014Z-0ea(0@YJEbX5q8>FfTWcBp>s60w=2 z(_qm2Lch#(e6>jq#vAhEzP4wz8x|eFK0s?Z7u*eG(!AHe22%8Y?U}-3KAxq6Kv6?c z`%B5l=oGA$mF0loM+?LR6(59ALPNNs`2|QTLT}J4Hw}$gYz#H`40yJ2k-QM96*h^a zCwI{TNp|tb(LWyqNOT9rr$lbzw=FscQP!1!on>K=Qa(en-!?lJhYY%*ocverKuGuP z+b0*lW;!?0J;YajbOJ~uX&*wdk8EQy6Hi1asMitacK$+I0mTd(13zo&rAwEVE~3^~{3hk-hz|SE@;8!&-bT7qi5x+Z1Ve_@S-EkW zA;JqDCkV*0vX&!^%Zs-I^lwj`CRyY~ZGKa3s&vp?g=t)%IaBX3gTa)=P6#oliqg zAKNZYXk`yNk*KQyve0R$u}5;ZH-x{xW4#4w5Mk*TmpBV7+xspA`{4tWLJR|718Jnf zy6fbDPv+MNXXpsY$gn|x_ZW2M9Vonj!AaqoX=Z6m-6&2b@a(zUrOdGBZCXgGk;etluu>x<+0sW<5At2~;K*dWtsaDdGVg0OyCF9c1fHu(m@S$Is+y_v2}V>m_s5iM@v?zGy&Bd zxHle|D)jM@;m?o=gmq+0a2kJl&|f3Zzt>>IdKzvmIQ}{lmR;U?myndK6U>6;WV}@i&a&#A%*sB?PLc2(=_Xnw-ER z?w%_m`CYji=_`A|*jKdY*du`>Zw%ZSybv1z4wFW;%8>UZV*Pf|kHimIBI@ByGHh(*7OAt07yIP@3}(YT(E{2pF_+HJwgu>cix z4U(h3GG39CFNk;HhAC-V?nt3H2*8Of(=UC=fh?9xZs0^g3KO#CT)cQOwZ9-OMf*xA z1Qa-r*WncFn6;rg9>J}Ob{f=jG-(I1_IN^qB>HjDd(8nR8irRh@Q&pB(if<7Bg`RZ zoY1YK78ppL0;?995P_TGDUilR2fNIJh7jP6{dj+v6ox1t*x-C_kYb92F9FXVodpWi zMjZEWy-t+0Kzc^`;eQw$`kw-m!qEX9vVcK|2fFU^X#1bS66kf*s?;CK(_gTe^`EoA z{+xpxoMF8!}!BQxBr5qJJls|tihfZ z12k=OLi>ZT`my{ON%S(m4*eI9@@G2C)%q}W2@Z(GXn~_XIk@ZEqyFl7Nk6|}tpM1r z*N3M4y|MoN>!54RKeJ!{{aZAt!r}k_crv@C@SMOu$a!6ZwM0JyO~n6GQc`jaPLx=_ ze9N&aOi^)Jb=lhkcP>a0y0dOqp>ZGZCCY7f@rweD`Fx|B!?+ZVWP~kn6+hhAT%ZDH z31TOp$Hlat8$J8zGZ;Lf`VcRH%t5Bwb!23uT>X`RyxEq*-`=j~V=p!v7qS3i6U7Td z(Ix77p+);-NF5D;QXm@Sv-rT>qq~~WT0(>fHb&cnOn5o$0g{RXcN`9cbl<@VhuB$| z7m@uQq2qvs@+bchKR;sRxOdKYbn*qlWZ^~0D5F(qfxh-7StB{{#P{qi+dXuK*{^gw z_dR>z!c6?cf(r4b%a<=J?|LbbkC9JEi(g@O6uy!*0b@Qzf`^WtV1RF&=POq(y?YeG zftlWYmN%>)2!QCpHr;!(sqC_G!iygZ8@@~DjA!+pj1fm=LOwUBG>D0Z-zzEGao~2B zS_l^^o$9&%R{QHJ56$oW{r(p&Y`z*d&#B`Cx;ilX#2DS=bdo@gzFGG{H2I{*+`|5& za{Liq6O~j189ioqMZ*Sw0oWNC_mLB~HH~DT@f3LnzC?B(f*a^dP|&Nre;V8Fu(xLb z-qI&SS{;n(ClDy{+S#>e^*mPNlbQ%L$V=?eyJS_0$cLL_VEYC;JN1@r zudhKu^#OOND2N4bx zrznUxJ>;?30>>}jXz9V-OtVp-61Rp`4)PWq)adBD<351C>!#sTg7S$2s}hzKDrNkd z!=q8KF1E!7P3B~Z&Acq@^|!1(gN>x4r$;?Fboc-H**X={1G@wFs`6otsfDIZ$j)Mu zhL(8V1li3&@c+GgRc5QU{ah0jB6k!eeWKPEq#L_mL+9wMrXT5x^ZXK*HddO_0@{nD z??=qX{)ZBfyo{C~8(j{a$hDq*g#%=_kGlg?wJkC ztC!7NlD_?TTRi%HUTVpUGsymDsd)>dTE^<22<>509wL%oKSNIrq^-K+@zPthy|={x!Al`}ZfP z@#uUZCcCVfC6#FXrKM1v`GBAZ0P3~-5;}-co6#&hlh&Gq6-naZ?IOa$Gyws@|0&+y zOITkpBC7L8k>*&1g$-0da0H=AF)9-l2AU4C2s?EAm*K=Q+D(t~)}<`Z4wdSoa778F zu^^!6kRqHwMrAMDg=?9VLDa$zCWLIRP;%2SD_jlQI46GN&x3Lyw_!w{G<6)}%dh$# zcM!K`)7TOitmTd3r*`5q06v~8e(VO~9L7v>0UMUe+W=W94R=A*PXvkc&!i;cc0pTG z;%J!L0pYZA>Qz`2;wZC1E(^PrVhm46I0k_EJhZQbr~&zam|YI}8(@q8kSXL_VwgJ@ zWeO}7NV9VL_H938Kwb5>&XQ`}w|HRfn^uLpRo`V;i?D%fkZ_V{6xj@gf07CVGZ)!Mkxd)nLv z@6(oc+duez7x41*Z2NNXD0HQyV8&|DAT(rv)OF|eqsQEif*Ufl@@hG-1>kSI4OgCwUcShnB{ zl2c7~&iK;uo@@wqr-FV6{3mokSc>%XNmG*;knO~7G^fi~C3vA96=r?2Xub-TE;d#S znQ`*%7dW`2-Xyayymgx0Qu|Ob%T|e;7D0BBHw)D7P(c+#{Z0;z*>9>(!fS+Jd1w{o%rs$3^3mk#h*9Ncyqjn*gwsL>oAB;@jig8C^g0IGFT?(9?(hm9dk zlS@J&@Q?WhHOIr4Gwt0tXf|jGtkrrZvR$7?$P_^yurbsq{H)Pp$Y;^KLeI~ikH&so zRfc!N!a#5?FnZ1WB3T_`{@O%UIEv!}F2Bazd}RK-!b$Jp;0323Z3y||K~X5L)IkkT za!SOzC)ja_2oNJ^;*zWZnl!SAZ?yO4x%KrFULWsI{9?krcw?}{un(NhYXK>OdN*l2 zkykh~U0VHxkj3w!-UJ<;k+X2=u2Jo!RCUodHk=8VkxqYn*^PQ>b8~Y?-H>X+^y~v3 zJ0;JQz@z3!!b<|{^?UT_k@vR)cOI0wFP_{CWg(EThFP~Rpqhl;&NeJEcR$ARVc?)R zM4Iq^e1_YPCM5(N`~jy{Y$ukopC4R7J0m+JT^%4MvJ9lATr7TiC|Ptud@Y651ZoBm z#P!>w`0)hJAfhskRj+o!`3h7msgeN*Lu^U&>eyUUQ&JM352P0(dn{0pD8@*1NJ=!q zExuowDN9pg(6SbO@ggUxLkbSydM>U!sDR@dM7?JygP&m+!ETlhFa!9_fH?F+VN1*_(1*`*J3#3H$7W&h|DsaIsw=y5FsTle zzgdAVPBx6eCe$0y7E<>%Dj)V`;w`u&;HAZb(HATX)OwiEGYQ^g)O>ukB%umhsHVW@ zDD^-Vjlal7^8n5CAf^fL_YQ;-K|`R}Y9OeGf%3+Jt0^gSQ2=0X`cs(?zGL_ErjhKj zo^0|k08YTpJni^`-A{rIa5Z7b@^Cp=f62kQJ`Gq)TVH=NP%*G~Pf_xd&m2|%Bw;}& zK|)6qx{q;W0QnLbiW=(-vBhXfHf$;c4-6s12|NMV)oDTs3n&$q!qOz-1UpCYiKnl? zxLrH{LAuYdkl@E5ot>n*_Z?{gKX!MYhEk@Y+Y%FbKowsdj5t`Z+2Z^49YbhGAp0I< zh3w(`=X7^>SGH`%rX*XR=u$UW5!0jtP`YGKuyW-}z%1mPxUaoRWY!ulL290d8=}^JibesE3nn;q~?t87-^be?58*Iv*w1&!7ECqq)rX?X9dW zsri__1|OB_$|#f|bN>i76Y9j6oF}z;xK5M6VcGVqV1o!sWYDa0 z5SCe)!LHXl)GT=fRtD)pn<0YyJPuQ488qa2c!M1sFjF;6U08Fp~kG z4GV*vL7ovfU&dLEYtehm15@hQ;NM(&SqBaFD^U9#PKQwvp*1Q-sD!I=rLsCLpgU0`qnB^|AD(S9J@}7zC$? zbuYhxh*ics5vV&pkVz7@5+%Fh;%7k>5pW3fou*3FMl6c*ghT=C7Mn)@+p06~3ije~ zmTpuZ3q-v`ISMQn$dzLBKK{-UBpK|puLO+z6ac4-cOnHw7)TPC0MP5>H%W00cMM#| zfwe=epFUB4Bb{@QB1q1(vGlaGV8em!kidkjsHlzRjSYkb5I{_ZCWYj?h{eQ=L)1ZC z)$k!qu?r~Fd+=xz8c8-?4ONDnF%6Kwbwd^-kXK+9YA~+G?LeRnbA2$hl)8|p=L&Pt zPC^GqT$Y+^Iix=yQ3sNI0Ei8JFYvg2TwYK)NdfgkUXWA~u?i;QEC32{jJ5yF@jyF2 z`fF*^lNH^bJvTV6mh(a&QGz$zRv!X=2*80>#6ikMNG$=FQ{l#4cm+%-0e&Dq6Krk* z8T`8O5IV4EG!G<7fywZcSS}IfP4*&zOYm8b~Cg) zURC~fH2=zD6yhktN&w!F4IzrBktw{?2nw(o=s_TyH^s1Z2e!*&xD_B;B2qrPmLGG8 z8Y_@nwO2*mvDDZW7UvE?XDPxl@c@XRJ<8B)e8J;-W&Y?KL!GFz$>35-fJcL!W|DFB z7>a{%i@GI!y|=sj`pZqm#^K(UFsw+%PfBdEh6$XY-#xs}ic@H~(&pCa!mPnp@6BIA zVeLsHD{7JD8_ipHQR01$YwRG9NQL?-+}x2}U0sH-dq}}q70Ub$FBNBG9Qc9giK;T$ z1M{C~)^Lpcv%Xyg7mGQE5i+B4NDU~@q~qHLL00T`dkElvU>K)SY0Fjg3@(621o(2C zM$9M({W_!w=|P28%-QQ;J^#@gVD+K^F!Q`|Lspjm0(uDcN+`fmfU1E(C&OlkhW}>b zpP_O4KbUxhA9JrG0bR>t@ccREGMgRz+R=9KbZBVQrPx>_76T~l7W^8OPx`ttd}L|) z^IuurG?Qi!Lb8A%P&moZLz{T5L&*zMuSRIqoZ(K@nKO0PatU^c?Ck95YstxFl~4-J zXP2;n+PdPchvDlxS4V#0F}yI_x*|H(MHfV59m-^UKeG3ll{mTQd(AX7{2*!QU9qhm z&6C1F{nPVbAZKAe2%JsQ?ij3af}CRFHUf2YhRV%;3>cNsUr7SlVbl)YowGILyi*6_ zzbUY|bRDS(4A`-{ZQU>u|7{qdO!2R{LJchY?QgU|D9O*Wh9hd*ei`oh`c)_HE_kWR zZWz#7I#VYeo*wA*wdW!uqu!ujOe(S16$Ap-7zuUSjd{P-GLT#j{zyh3Deu1it03*0 z+nfhu%``ncyYR65xfRDO;@&$sR&Ru#Fk7YjulEblssCrqHB6Z?Y*jE#4ympGgyPWL zW$Pu`rp@4pq6^nND=7g)IOIDy!z?vy3B1@2#6p1($r_IN1FQA#RM_4{xNYCDnZ3tx zjjCfyDvJS(qU6yTb7_g1n(e(Q2g5869#ax%nrTtFWdF^yS2HG}o;pRnAX6+`t^S6N zR51+TwcoW^q~Ufi7zXo|)klu4Jklp|Y_wk=fH*M_)Nz9151zIRSr%iZo+09pUC`q^ zKwXe1X@gKQhFZO57@MdDeG;IFOK&Z-eD)i8j=7p@)~fNlr%s=Kf_sXEQ6C#LyP1_M zwq%#Y%90qArla55u+|;--;XXstf(3J>bPjqr|MXM?-0%KY23HNpXwv_fS62pxnTzg zbfVJyE*Caud47e-lgGWQ7vJUY1rMW*Ta5+@1J>1AbhsUnfnjI>H)<>P%F{ygW$#z6 zUfeJ%9M3>tosje^yi-&u5@`R!ag4lBd2>Ai(t|1E*CU!-p5hLgCL+Sc7(&B5)b#7>us*cF z=x-CCN6vIgs?2wl6F?J7ez5pI6j2#=cdpuFikZ-BE z98B6$&LUswNXrD*EobuvO~&}(AAy2UcN&Sh;2E}B@0J?w8zO!QRHP}!Bpw4ZKA^M+ zIl~EHk!Y?80~n!j<7iuj5L1NmfVyT*sRmtb|`4hRf<=E}~YMWIWckd27^qPjcO5pniNtV?44p zyy>tW_QC`#owjZaIPqpa-O5$SQZmHonH5$W*X^}cwJhC*#P##+vc@PxGF;M(U&V-W z^a7qKQ=$tws6+0T*NWxK%TnvhMlT)2_zI0@QEX^^d<+B^O&kdkKJ)juAI$_O$%HRD z-~#EDPB$=(3Oi}bu^Uh(vSC$foU*<{?qcB_8Jgv>Z1KUc1ls4zKW>OYQbV8)a-Zpd zXrT>IEX*;I5uhe1=CeotZbMw7qopw*kmNG!Cjt*?i8IgDcl)-xi1<$QZv66U8+^DM zTRG~_Rl(V_uOzu%|Mh&*Pd6X8$|>MoQf>upF+HM*w-Qy+GRJ8xTbJoj({>P+=p5Jw zSb!)+gxaWije;_FJShEx22&TMB4KdE;0L+yt_vuh*b({@9>i^`cd9m_U%2AKKPBD% zp2nr*R<5Ck5tY~0*HEg69HpiA%|fVrQLV_vb&R5^i0UCypW;8I(L4ZpYtzLiTZh}` zLco<-H2@`GP$A7>1(GK3EvdSA@o}^yfN}>i_Sr5O*_%%9J9yoP1|)}fw?B(mqcrryAu!@Nvo?PrR)cN!z)HF4fQn5q^3DoAdrY_SE4EO$5x&B5SN zKg5OnDnQTJdjk1RhqCiXv!#DBF(ccmR|{@ixZWau(NS>^bx7fa7k9>qOx{jtr@ zfWN=Q`6abKV6`|H0pzxx$I5LSiP!Txl2ow!F=Oaqv0j0U1+Q1^* z%n!TqYA0|bg}Zr9Q^C`W|&(gad zEa27t=4<45>oB@Y>bxF7$?j2C@KvA9;;?5&s$0*I^3~7rp*PlfJ;z)Up+<6ahTX*- zfWNfEp$Z_xg&=Wc*1N~T1lEtdu|4Yg8yPt&VjiGTH@4hirRoL)3P z4B$J%vr`&&MX{eG3Mpzkv0*AF>O=u8;e-TXdlHaDR&;cfkqV`JKi8p(l!15lO7lO5 zVdXSBp7piwGT-y5j-g?sB*w+`iL~iR#Mmv1Jg+n@^ceQqyG=&d)AP}5$=I`ITg~i0 zm;Ncws%L47`BHH#9~bHQ@pv?ZXB-f#IPcu`^Dx>KfT{1pfN4zs;X+EF4gC$nK(1Ge zD#QK#A%}>j4#Pu}4G+EID?5KZZ~NTBbPrxIL(YyBf(|+QQ?31{8`4}SJ|xx zMvLQAGi)zZRSt9?EcaeucDTR+S(TK6_q{P2i`k7oo6ma+8dCDnEjiI7YpNGVc$favF$mTKQdqmLk9j#i54 z!mK@vaV@>FEUWDOyLXrp%9fZuHXnH&p3Qc(>%coh&ckYmhHWtM%7Z?eDo!kdEa=|o zyV|;bnqi9?m&zJou&fgK%Y4ot*g#In9$xcc3}s@?N^FyX`i%|BVeFc(7s+hhg!^D5 zirZ6i)f*QGxbjwG1doz!zHwfGqNk*4uUkxYEC2}k%y~zB0Tr06nCE5-q=kh++l2e- z@%aR}3eCyt&A=(Eb(iTf={OoId(70SkxO zdJb0`e=Ge)cPL@><-WZ&?QUoNwtc#fLtd=XaOc|B+-O8QNWFKhz7}wdZ{XCwH}zO* z`H72Wdds40-mI9kKv9v=P}^y{m)N~b#bG!xRE)T3xbd(vO;M)w&M!5Wq2O2 z+>C{I>+UObj8F!M_8n+k>dG!Wr~JM8BQ5}*^XOyW4)oBl{=$^a;MHt{YSucEe}e0bc*i?f$8oOBZ^qUCP;GE|0NVmukJ}VQfA-|I zFbs&YbEVgvvmBFPiO^y6n=-;d->>fvZ!zm=;pqGAIBgkc2(RWo2O%Mst&{ogy|J8U zm3i<(=k?6Sl!NBo)vs3q=E%v;4%?J@wAxDO-SyqGEw)ZxZY&tSMl16BAC{AB0n<1G z_xs*FeY)>*zxT%FdXXM+9r7ZLg&kLQsV+_emy#{V!Jac79v9>H6#C zsF2`*vrgsK@6C2T>=1`j`vx8-3ZdHPu5zcOP^sS*ZKY#nMd>wZU}Ny#&^knxsJ=8@k3t-?Gwy!V^n(Uh6?M z58i-G{OOy?6E|HUWIbh$Dss_=kA8%tXgajyI*+$EXF2hJ0JH7<19(3a^@k)n0caQv zc*Ma!C&rN%_u~k(!rXzCc^|d+Sy@?G5%r|dZUNhoXTqAgE@oSn@j=h}B>ky~#)4SG z`iHmY0FcDz`J#8Bvioi+mO`lW#I@pI$k$%pvQvM5Mi^CntyqH45&k}c`Tz8e|JnGX zK@TA1jD^-A2(OnMxr;Ba8*)oY?(>o3H)$4jkV)i5yXz$E_Gj4OERNUwoK#|(#R92o z3^q<)O_05`o>nR-f>^h&V*V`}I;DDMtnmQC(kCE#5d6(Vm!N$`EsrB?0*F{a z`g$9;5$VK{`+--`vbMf#`!g|qNA=Y+0L~?Vp5h4vSuYHZjurwS4rxd*nDLGk9VwXK zLbMI-PY}<$>Tk}zk(ntOVGte_#fC$PL{X}GsslbC`NBbRPE-UDS!%yJaSE4fB-M1X z&nM^WTvuYX6)gjhmTOF7!=Fh=OKbOP-vpp7h?+pI&m-cBJgfdS^SAY<=xPHn0y6?h zBEo3~Jp2|=z|`4nc~}UO!8K#pTZBe zTz;T)8hm4>&6>pJ z+mo!&k$F@r!Y3Qf={`j&8CNZ2J?iFtP`PM0*vRaEa&?vU>+u4Zq}PX_p4J zXZX&aI}xsI!ywi)2^wy@VE5@^Z-F+FL=%kp#Z3d+lZ1n;f({YbfK=-^@ z7Z~QuBcKDkBB-VJL3c+gLkexdcQu7)Mm^M_K;<@&PLxfD)_>zo1|;k2DMe=nnsh z``QFhf3fN6vLMY#WV>C5$*brfF!~;3RkogH7yar3*~bHo)V5I zy628(W-hQj+OrhllLI%fLS5e=G2oz0aXY8P{KsP7rA zG7sMVA}^N2$VJ;qI2qFT8ojn2K(dB$p7X!He>f8{pM-|EAP&UIi(JRvxbOL0mhNv5 z3d>iml7OEw2YGJ>P*_$EW8=w!SlG*3M*><>vZ|Mc5fDsl9$!&7YxGfTNEGEiohEedpSpk?1k~9k>e_ke(Az6K0)x7X4@9M;e7wa$O{sGT z5Ux;)aX~aj3$gk(hM&>99zaJhIosj%F~(`~YeEcHozXjvL<9HZhgi3QMTb|=t?Q_T z`{M}?fj?p;jU8`a7#zaMw1C|fs;itAM%#gVClUe#&>+xE4~!C|8X+1WW%K>}%Way_ zdO?nt--UF|*8z|s0Tt|6$>M-s3190Brm-p76-?;l@&2l8`1tu5T6P6FO+nq7ce>C5+?~DBVS4VeVng8{j5#{AoHVg}Tz>&3*6z74_ zgk6zTqWfo{1;EruI9_GLEwuPp&ObdBPZQGO7VHZQG?iGfLIhv!3;#L}g;d0ZhVr5f zHufQ}0*h1IUG{9NDcM%8-&E2fYF>*WyA45&x@ZwhJO;5Q>axYjyW${Qxeb)iH&1& z6*wgFt;v72@7U-6`Ellk`Uhep>?VS7z$q<0EDvRssX{c!aZPxQ!R1rL5LC^-zcTEF z4dkF(+(x-3#6dv1i_m@`aI)l`6F+}jV$Cjyw?iRJmclCsBA9Uaee1~Pf|>0r~*D5>&a%VhqzZ}^uAE(;V6j33TZ%yqPa3iotJ zo2CcKo!W8p#NQYG`>UY8a)wfXpUixSd5>l}cKj_!|{his8tZ{RWgeLun z+^=eTytG_&bY@aX&C&9ogGJj18>jgXZT=MhG3^XJq#aNG8PJ1w%e@Pi&Jn~o7Uov| z05>J{NQOufGyc0}oXS6GMMaw3{2&LEtPc8gJhD;3-hbPTN?xhfTy)(Ufgt?Q?>4F` zCP6`|N&83ED(twI$!a9hTTR^Rchl3Nm)G4tIlE>j|J7MR=8GmB+L&o<@iBDH!u8X3 z*-bW0@E&_!{O_%A!cyMmKH4%qCayN}d3ydp#r(Ws@WDv{21rIlzj*`fQhg4pUww#V zxHcK-m=~lil`2?0E9hn4=o8L{*^e%Srt2pvKfEHAx{o&5UE|ZDH+;Qjb;r%oPgdFF zo6xmlZ;IZheeW!eY+0UA=wg)L(I^nbrPc~c*6?NBILxYz(!yz@LR-P%RucO<$t#siV`@_{^nWFPQ&cShLYH{HN?qdNXpq&Z2YG3LkQO z7yni@Tdz#gROIx?)Wh4}y+RiCMI>B?}j0`l5X8~-=dnCy4P{1S$yB4S1zmPOSALxb9at)#cLX5W|U+1{GH zd~oRS2RRBa=G*ELInI5{d=hd0x}NLd<=+$v?&tEgn2TGtr|z?t@UO^64$m;u+y+VQ zrCeb$phDC0bd0>>a?F}JF8F%)QtCoN+_wk1fjlr^F9sPsUe%j_92x1SGpfxmvl0K3 zP!3ZzkF_#QGnoQ=lguA?^^zXXwDYF6qEAU@)wBS%NnEHrqyBOa)v`Fv#Z65Q4Svgq zs5T!Vm7m}7jD~<`4=N!ygWMV-tqLlmR%l@P%oL0fDH$VMIEnO6E=rVn?MV7EZ zyh<-BM8*$c3_-0GsVO9E?Yl1+rd)(`3P6ZJIDPQIiz4rI7bNFPh4XN`7NPd~h*7;z z*OB8c(*60SKFBx8tPtk#qyR#Y#Prs9gWm@ZA6B}vDiHOz(NhCN8GM=VSG3I+W@tW> zoZuYtJibH3V5lZpxz<`nCEHDqLnKO zjo89UjIH{IfTp-=tsl^xR*srF0Si!vDm7W!q7lHlyB9e+nGJ(WB8%v>*SpaP zI-NjFc&-eo?cwI;)00yL?_it&0 zY^DaHIhusxHaNM*lVoIcT9Ge3gZu{}y}!}rmIH_pG;4Ym{GJ%EBvpZdB|bk<|6->u zLaWRpr=>1$O29MLz;J?E;vjA+#l}}YHhKiz7DBtTMS(b}nziZZT#HU9pP^_lGh&y7 zb@<`~;c0>iby4el6!?#vS74oX`jJu$o01lfz}W481dYIEm9;s2+O`RkPySiv&NZl} zKM^-p#(B-$Sf|UAzIxNsk6Mj3OYUSiCuSJVnZ#wjgB?aZz?NW#!~8`^ZahpXdH%d7 zr3q>`A#74|)PWw7=uI*l#oH=H?M*UZR1^Acv52se;0aKap{0}s1v|CzQY#j?U~1n@ zFq(W2gNAX}17M*!mP2|o(k#EXDG4XaZ9{E@3{(#nrGt}`MSKTgXr%Lc**~)md^q<0 zqzBel-QHg_#}lFA7H(qxG@R$hx6G|OGEAnTqt6dg(FBx#R8_!%>m+c}kIz+GN}$U_ z%y5gG2qhtDqYqKk(Zm=4*7)Oiyl=0{$-KhxKkniMfBon^3&@Y=fMQckCfifgm+ejU zK!K;^xaev~Z0AS?J2 zeNZh!tgO_D4>TG3_Bf&l!TyXN-KFqtq2~rfX;6<#*%5MN5`F^jL19g1<{u~+gU96_ z^(MD2JZ7v|z1V_8Wi&FA!LB!2%;BR2Boy$SZe!{qBH#&+MFNCHtr7cJWKJC^05n>0 zo!4gj;2Vf}1b`b1knZ(oADDET^uHbWdT;HInPYO?ue?IF%ynck%)b}in_cJy=9KPtZqR0rX_;=cyNp#ugg?g24tF9m_xta2YvXy8wzP99<#R4^3=(lytPkUJ zh$;OQ!@{{XCtSCE*(~+|!vHJkPlLw=zve~zg}gOS&S17y3YuqMNQ#YHbIH^4!iWYR zztB~yI*AR@p`D>@C25h<&P9Y=yir;s!WHt|Kr2*%|FBuoy4sc}gImshEwx4@;hu*; z4$M9XVt?RVDL*FfK_c!o*y)cmN%IxShIOwcT8%w=DuGAQlx2`11oGNQJl;b0CAtg_ zsH^UJ$)H(b!M@uc{+eOUU2%6R-zd;kqJ)UAZWW0B@_eg`CsloQcdYl>AkNHOUO z$eC1fYXt=5($#Xt%zipdN)lHusgrb*?$TVIab(2)S;m@u`)^rAWmQ^>Kk8a}@!kxD zqfmrX2LYl}X8`*%gw`3%2W-P@3(tmBh+x?>1P&$%l{lR-z^N(!^=F&l@t=b;T~B@y zO{;tv&o01n+sxoYe0Z^@r2nH7K|XHzn>s0<90C-^XMdJ1dM+S&MnQ5-cd<{N)$s$- zAqTQU4!k~H>YQ;S?)A3j%Iv0$5&_}y_yeZSOtk6o&jMW=8T$mWbWQdAc%D^`3+`4e z>(YJ^Asv-&D3|PKMuSi;?%5e@6OU+3ZQrlD_`>dnc)^q*o zjZ0iR|4zZFNjRe)Ny@?W;pAy7ywG8#w(o-mjA+*9+8#o&AWpnIE>$x65k13^DA#pf z=nX(w76yFu&+c1=)V@st2E)P^Lv1r!>zb69NVhTSw~yyGs%gRrirYea_!tZVQTg}- zjo1bt7QID!2|u=4WrHi-9DDcfVhE~!YD(ZFv0GdUU`;7FYdS|zZoQB3NHk_|k~#$< zHBHdmeGp*H>Jd+gn7VB zGmFoQ*iSfxnTX%(-Xglf>BONG`u@simqc@zoz?l+`Dj#nS;R>N#nrRVRBkj~l;kS3 zr9x(|u_V8Uk@W6qsfL<3UUw!Q(5AN%rLAuT_(X)CSZ#CuCU7dypDkk_03H^J>MyhJ z_28DP%|4kpG3W?tojgWs1MWrLo6n(P1V5pHY6Xz6k9XhW$u!>=-NXZ5??OL7ZURU{ zF~=uB>xb(ut3cFP3AjcHbp_+ep{`!2)!R}>@G)S&uI$$k9D!I@d^v1!UGz_U1J3WA zJ5EPC#ptlvKH%b34mHmB6+bW^**Lfa^1vhyP6Bd+E}5AL>Qc)-d@`=zx|Fw0N`Xb; zOq!~pCT>CQduQ(kb7uu`#AtV2n0;PFa=u2m$eew3YA=T0OK!bi)NIOnltW{GUQI`%oWS!V{~tGE{@ zr@M!KjpteHxV7bj`}$3c)Q_}jm$D8vUJA9EB;%qZvr};CsPVoPPk2_O-n$*;BsQYy zcItG%#+U>iJp=Z0f3_~TUMI?LHz!PM7iu@3S^rg9DoBc&9A`VYJI91qMPIpWjxvsx z&JB8OYmG0wT%z|hJ)^k%-CRK~pj#`14dYNY^8=HnCqVtI)H4C}^aScaAP!myWsB2{ zg)z9evoU)#chN~ zR@c-T!SU0-k(ExHG2>eO8+;$@WPx>K);MJ*4HkO?Sb3$RGSt#WO|QQ`0tbWrq#oSU zAV8I5P>}A`ZuO(Xydb`dsULdh&Qupnswu`tCyPgP!X)5wpKGGV-=-;j%Ajt&M|}BG z70vv;p9A((zFKOq>3rtY`)vZk!Z8xR>o_BCI5iZrb8`u%e0X7(uA^b+tB>7o)9HhX zOi^_vZ;AlRCj8^hD60wXL|jX#5M`G-YOfh1wBrcBU)iEs?ct&C7p0`17o|=6(p6QY zs~dgug*Tsyknc1t1&J1UW%n_~>GmVF@{8TSZA|1@zojhDRQ37e|1N_&TZ(G#>GE4h zB+5x?UWs&NJANZwU~taP4Y7TzrMWUp1yrU<6q)RpH1omi%T9;(o!{wGzUQN2E^vYw zE&6lh+|}3ET)G%<_1!_jUtty@{($WQA0sXQ$S^?9ZDNPRqQ#H>WC2@NvvnLf=2K&* z_FY^W?ZKN@z$O6$!DoVIpUA4k{e*%5p8CBrGc9X#2b?4Ft{p}FQ&Xcu_$yDFiOr7o zZIhKNQr1fEI$n_r0>7(SI`97d5Ae=da=#gwHx0~%XeJcH)^HR{aMSX^cm(}9ydY3? z0ML;%0nKMyw#NPmvq?NdHC)4+mAkc~V%WIPXKePqk+r-1PncDwXWp9f>-Ax|6B!B|n$sjMu)3!U=FF{2 z)vKD-FS*lR(|I3v=w+74Pt5F-owDbqI%%ZONC?nz^VEH=k)|s#S6j!BD~vTk{gG}? zpVo^Aao!F75gwk+X7;=E&xEh#Z3*Rc3=6y6Z)F`3w)1bHE~lC>Ps(hXF8|3c^O7`y zyJvQUyXFOc{fH=KzODQbM_X!Sm8^b|#P^4%-|=l|n6s*4kHqu=K8_6CDTj38w3D;? zq&<#WEl+shb4AO|PENC8;m&MvmV#9Bk$p z#Z#nHAN?*wR^ioy9KS4Fk_c(caJ-G;d)pNkbFurkvtRpOr^zu;0+7ZQeHmVr)O~tx zaKVL2p0QCpK>oJWcWms!dU6&BQmH<^ocWx$#8g}xmq5LZ@$n)wiwR=%KB#VK-YS?t zjONh=L!$;I{UeRl{ExzK&WH$KD78%1#w{w~=N>belAUo#I4z1~*^R#%?Y){5ga%g3uapU3j2_^h zFFznGUQv5u_?q&9=#^XJCFfU#i})R}ERyV%SFRlpUMh7oJTcL9$C-eLG{u-dGOX^C zXJj4QV0KdX5+4U6HQi$QcV%|}=BGa_*O)kmi>MfA@5xE$AJpPlF=uB~Vz_@&9mfXu zt@q1%4oV9zuXO#WSjjEw$Uaq}+*Q(dZCR19sb;{Rn+q0=e|(|Un*`L&fc^W~O%{T@ zGR8p|Lmmiog-} zmoavy1v9g>*cwvRj_@y37F>?}@rY_>grbu`)!vR1XX-!Q>9=}kq$K6_w)|y%3;$Tx zD&dWFfC&hbC3Znc57ANtK z#-b@hY9rP2(s~m89%*r};=$GtcbNP}#b?yl34E z73EF7_2=(L9heD#N%WEHs#1>5-bR&iA)^%>0%^tkY%JtVfXFf-Z$kM7wLP`@lfoI0 z6In|EzpHosUZXEgzHl_7f@UzGUxAjK?8-56l{#-bx>Dh(LId7Wflb5C!LHGkikK)6 zVKwz4PW>!EmnUdPgJ_wcDxe%x(~+eSY;Q@`B^G0kHr}65o-hpjQHWKaGN%<#pGj)i z{wKMRV|C8KjBu0!A0P{PxoZi5u-Hc{rar}nyyPCg^8h`suYDaqhjNkC!8r6Q?Y+EGWjDedgLA7;~#L@Ok&rh z$?_sj-F)^z<}bw}G*%6vlwb>MXi~hpF}moq;oP+f{4WdrrGNce^`YvC;Me>Z6mCnB zGj!fsR(x<)(*Ln?c$d}dOa4<8UbAWXrL6OF;<8OQadrU{s{g+Idwp6gaoj<6}&AT{UJ|r=2{`qrwAQd2d$@EBfs8k^G}wJp@F1E8-(mLlCYBQI9iaMo>g} zC2(r^?s|zts?F0ID4E`50cdNIjw(hNeCX^9fmsy|O2D{df1Skf0Hil-sBavd7{$n6 zVNh!WFmxNj1lWg26Q2$zU=4^*zz|BH^@BKy+*v1>-?Wz%>y7;!0!?2}<~OmFJ|z_y zEn3nh93dg8e_vx2d%z*fZFw9{U$P?21F9mp1S4;l@MLZDiNC@vS?V6dc77mhJi$#@ zGMW>Rt!8}g`m5S~(<9Vkbo{Ec{H7T07355@s^d{iPG1`1x?&P9$0WW>H<-YWCX$2a zce-vpf9=~^vy%p9>HSL@emeAs>`1$4TK6Ty#0lE{i?W)(m#?og%QN^jC0k3m%>*v-j9NO_AtTP4||2Ug+|F88VfBqYwRj?ZpC5t9X*Ml>vQB6s!ZZjw+nfCH>!4Vwa;4niwQpk+lS`rUbz{|C%ICZWl@GqiKC3;`j|c` z`)QemI+ye;ty9FFZ`#I{dPBnhSdoNFBin<_n>LaWu@B#wYA@KmPvN6%j((i3&VX5R zh~eAvOcRTbfmauh}xG!d(b)QCfL`Y{F$U&fDkl&m@gM9?#-{{NuGwzw4bJE!1h+#K-kTRA$GR@0E)~ z%Wa#k)kU3hR$Q*4-EZ_-j{jW5QVXp)(!!2v8?;WzzJJ%&_0~Yp@|C0``xM@FG2tU` zXPg({@wl*6b}c)X)yg_uqu@LtS&4t~)6p*f&a) zAHVFfJ{EaO#PxcS>88AI94hAxrKJ{aJ))tiqKV-5>7Mk=WS8*pYkIro?=N=4@#=u!D$5Wm6AGx_f&rs)!NO_S};!Cki ziJe77x;rMP-YDI~D`LC|qz{o~Ar+bK(*^B?6U>MEeSh@E94ORj3j#J)`NV{Cit4xH zV_4TkLk5C7ay{k?&5U1~7$VblI)d|4*4a(*K9?8li+43LJ)dy$I!l?%kwp_9^1#`< zn0~dm-YlyB11BUEn-(j%9C=&rezwz*ZR#qKOPjJ6zRa6p%Chi{%6-8-2iF8~E#9TS z%l_hHiezoDzRj^o?^TM#)8;g*l$UE~qP`Y3&>?Yh(9 zKBoeI-iX)44JG>Bcdwnj088G~FS8Zx0(oZPrkeKnbOg_=yFq+699aXoS6e_H5MeGA zf2&Y>#?`QegKCIJ=MCZ`9ZnPniF`KYpW(hVd4QMGbgquGs7i9Wv0-HR!nGWZU;LKL zPFylO{LxQ7=amxj4{oQYNCcl-?UE6=T5s{b^9{wid#l4)qUHY~)=eIm{jFBJ)I#J` zaalZb*{|DJieCY0+4pIWEs2(`mt$|xU-)u^|FpOTN5aEzp2*lU#VXu$vt5|{w5(uG zWXY2z$5gs1%7iO#LE&aD_fK(?aBc6dZkOZ7hK$A!>}}^a0Ol4y9uDL)=`BDfSiIlws;9@GAoy0?+PbE|-9c>{@Q7={Ifv zr$ReIjLbe-?iSrv8x>--SyIjB%;y9A=_0JP(E_`cn7$GZjbJ-7>t&gw<6Goo!_Kld zgapcjOhaY}$O#F^$R~%($r#Hg8(0`jbCU!NW)kl#8&Vj#@z3 zJ$cdgkz<#Sh3}3WyLc+dylmN!W{wHB`w5<`?1rs|4v(isqN}Qc*C>YVZqoyY*6bsD z@@M?04NnQa!izPpxmBS3V5+pR(cFxF>%VC74jHMH()^1uba3$^xBPnrXFugC_gkt` z$XjE;WV0GuY8lUGqP2d^-p$U|b6DF|)N}2e98KMr9a2XxYKocOz8hZj|FQPw@mQ_x z`|yg$(8QJ@C1fZSqLfO>kZ3YCm=y|0q zqV|5ipZCw#=V|Z#ly3LE*0rwdJdg7j%*BpeR7)JLQ#-5@AQXQWZTjdF?f;#RHrx8& zP1)N0>oj`g#R_XBv%?Klzl3q#W?@tIW6w+J@Cvn);(fh% z?fS|?r5-oE${x$JwrD2vbc-C3+;(hud-zfhxeU|L{6Dmh2)mYrb9D8+r}9Lfi>O^R zD@xf|XR2xCxIV7AGw!fWzT3pQoLf61%ONNzB#wXa(rD*@mdBKeuTbVn*v|g>lJV8V zhe9f*=R{xO?6v7p6XS`RJsgieWD` zdJ^>FoVb|r67bPKhI1xr)V)VG!Y&={5>+nF3m@(|Kjq?G`mD22;u+V>5;U}TO1XsJ zJzQ;X_E~HycLKB1LFhvFIh|%n@oSgFl+};cKIY-vCWfO((01*}k;n%w#!I)l5{+rt zCm**+Cq$LqO;p%sdzk9 z*G%2ycgA^ynk0lRo#8f3gx_X(vaPs`kiDLtc7H|F+Hns-dI`-6mIJq8N%;f;6GjIR za_Yt>7i7d7o*upM2E~naz~oCwr>_Rw*xD|Cc09`k>wD2nEEzLRT|z8R{Z6}fb3mP4 zvPexPAODLz{Y@<5^K3===ema;PP9J}BsWD%Y|)Bgd9?#tU6MQ6!{(qr-Tble-QShC zS;UW4B4AEy`?_DBx%X9WI+R(Nwq*5U4MUNLz~xcL;(N9Y_dF=G8lQ|?@@??VdqLgU z{YUQeX!3k(4}adqtH8^X1V*SJhwvj?iHLKjjxggQtd*R@YHUu}UYS|YsF|;t^<2-k zP|rTZFg#4iB|wN5)pgv$=epA$PQ>Ixc;=c{%RJM+_1m|Whq&2{ZVsp#ZcGb;X)`!T+$sleHn-$3=<7GF#y#9%6G{RmrEQlySKF|`2 zmi9AtQf&EYU-#|1+Fw@LkhcEtyEPg`ao6c0f7j&G+;!?l#E-44C+0M3{5nzlCQq;} zOo2bu;vBPKE~gNqDjB_@Vh(%sYys=T7rZt6bQFY3#wTPvlT_S}s{P@_uw#kh{^{dH z!8TKzVlefSqrvsxQU9=Vwmd%>=@x!$%3{wgPPVr_z~Bi02TKsrO*QyIP_ijJd82j@ z`==wyd7_YlM5U%z^4+_O)?Z#Z6LDBX13-H5M6lQ~UVaZQS3aO~=Yg+QMEuTl1dK$( z3?^FO_VWGhM?oG%Eai04BH-Jgt>wbBHa;;F|=nNv5l137qHj9fl0i{V%}=$4~2X;De-#xv=g7CKnS^(7B) zt(>J1B))Hu?@pi1gIT}5kE`B0@Zz}lPKzz6T{nkr&&rB_QgJ^@tKneJ&MTK|9jE^u z`o8pWt+zwZrO40wt-G{_4<6JzU*FX%+57Qk&VvXI!2&&dR_TKuOW0%qJ-$>H*>bu@ z?Ir3ppJvogHQa8C$dil-%RpwfYrdkXv_g?2Z zOTqA2#xx$iFzper)E7j-5H88Pz3Bx#_l!iJ%znO6V_MCVqvxeE{C=EljrLF5I7`X?BTG|{=lm3~O&9RAFcm#i#a1aGpcnSO`9QsZX|LQ$2f04`o}UYA z?@QHuKb^yIsnY*YVd?RqqMdpgUzXi2-z+*Mc%KeW^wFHRT(T~nOKmnBZ=8Ss1s7Ve z3R&0FRcf+LHYzf!f_ZIAbdN`iv9nsS|K^#lvooq->q%aP=VoeQUn7z(B<#Y#+Z{n| z4CSoZraW4z5Iqrd@QmtM{kf3590L=nDK&3M4cBDG427=Ab~X2s3tkbS8-f*1%q?i~ z0*lJH;W@_+(kc=@kwqmGLqr0CMWY;;-@7U@M~bPS_>Br$vi4)+^K+^=86Vhjd{7qGr0mor{ zgiQYgZ=zWOGAnzqcSl{f0jQ{crJD{=h5=Y|9exO$IpNW~Xm3OCFi z`mQ2%I{$tbEny2`L$^~8>(=;IzvSQb-oa1vVr^Qo#p0eljU~9LId2Pl1YILlHG1^R z`6BjS8H!xmuuEj)crX8%y;98rd3pmkeu}RD+J3C*OXsUMv7E-rZ#7kfK@^Ji8g_5zh*AXPWxx2gSpU~YQqBG&@TfCo<_Sry+4t|>b2Kv z>!b_Q%~>a~|FrC0UOy0%lABYqCEc9qOI=D)!z?W1~B(XD?Z|b?t_3 zkz!->dkyoxMWkFvn1d&R5aq@KKN9r8BvK4oT5RkLvt-HrNGDH7c*sKn`~>oh18PWd zPc)H+N7`e|9!M>b;|N_3xy%j>4pOckL|P+BjvzCTo`^;U*Iz$Zj)wY{18Pd($}u2h z;m5w}jKf(C_8y<+^NY&`U}H>^cDV9#^?1SBunDL&+p?k4$N^nNUULE>b0>q|LSvt| z_KV?gLH2Q&?l#l0gOdyANia^UPwhVdOs6487$Ecnwa_U!AgWPJ2jo51K>N(>gZzqw zKqIk_u>OOH3DP$nSlO=zs7iz}OE5z-?BO}23G^Kg{1LFJ-j{E&(2UiI=&2A|b!Y{g zwnOl=5IGT)sYm3}2Z6ij{Q2usALB~E)|3sN_Wl${q~yp2DE0yb$Ivy-CF4_2KGekz zhAE#{+&LL>sRY&&3zAV#=UVmaD=Tr?LGj6l^;+r;u)a+PY!@~MZ^3<;Wi{&>eEkcV=@8196=7Pao{~0G;E6(TK`#i{R zt{*6ow5e{W=aK+xak7c*lkS60yf(X3hW!?tR6hF%BN(sVf8nFYkYZ$MW*(dLKTiBV zvg7Sq*xion{77A^VRJTZwqnTe`!u~b?^=r9n45p_9w|N6YjVzJ2-JQBsW2GhVz4ko z-4=NA`FrTN~UEno0hSdV>5+A7o{9+%o~%jikv`fvM0$YCtE8 zZ9|0gU{KAmFm%LG^x?tp62zs#^dsl)4TSF^c8d$9hS1V+fgR;Xo8i38H|KoqPJ{9& z-cW{O*rAMLt51>G#VUk3roVo|{IBs-*m@faSVu_q49!7^@WDXvYWewH8gbZY&>{RA z{w=H|m0pW+@c9R;PrWG9SKCda+NBAI<|0iTJp~L;b6$tbq0U+XYaF z`=JJZju98(*cKVP`k^}JOxZ?bmEp?I&h*6N26M+huluRDn+LK+Rz z2Hv%w%!}*Cmq$%ulCtI(Hu;m!WOKN?a+Z`+TcT# zrpuv%$DDF1rM6%`vj52V9gbk*LjP9wyB5a^cAR_p1-@YSSYh(P_+YNVpBT^_{`+Ml zl=~fKd=q@c)B`@g4(>u!d2%e20R3uwN}b@HtBM)K@4J$^H53_wNtE#p_{F0Pl~;$mC7;3kR{sk)zs-~r*qjRbFk4VW{$7tHi2NA|Sb~+~2 zdA>p`yK+CM9S*e|C#M>@&Lw&}(s zE;9uT$-=^o0VR)W4IBl+0ds9g;&)yfpdX9@M0|Dt57UUlnejn2tL4`3d4S}{xOMwF zY%n4mCcW~XhCR+bk%+jV$L73+9o6f0zIh9_e$EPl*HiL^7& z=r?6#Zi%QYH(7E|g%%tVzB2fIQjU_-a`n4c1HcMvG5(0Hxf0Uf*y2}+YBP1j39N`9P6czpJKeplAGPXs$sJV=r>{={O**$?#=*=&)C7)+0KlN1}=ZLF$Omdu9dV|KI4fhON;)cLJnrCP;~V5 zmku5i90yTZ?sEjC14|3K{Hygvy`LdtJ8pU&Ib7sdfp(TOl5~Y$-E%a7cSTFpPHc-5 zvJ4TsB`iDF^lo{z?|xqt_j&fpmpoidgiOmNbq7e&(f`hUn|tKp?~R?Eon>SKjW`K1 zH^n|Rj)CRra_CM$(@u5ECd!b4`ji0QDaKVNjh@xiEN|EWdsF)qB|N{3s>jdc%Dn0O zX~_@#XuWKaSqgV(sJdB7R-_CM<_)VOWb{&pw3Jjk3ukrfUHnD%$^|7ob4D*7821ke zW?9NO%e3nV z1!CZ*8UhfagM~BTh6BT#Si{P|z<5W>T7BP@=C|ou2G{MOi)R<`vgUvY0?Rz1(ms7O~*Yu zsO@vz?UU*?sQ$2$U;L?`p}6Im8kr~$iKp?pY9-U(jopzDm&&SGn-9X>!sGZ6h zD{J>DXTDSl!Y&%tEU%}>_Y-g-thkh6H?RmJ32!DhmVQi9vj4_j^b{K#Ydel?6@A zv-H|mU3?6x!|~G6(ljf_#ih&)a=HO|iGme8I_yt+)8d>wS@!JvIUJ5N7Z~5oM2 z-4}SGuNcZKL^in>j4(6K3NH(+tE<~zYWK30rDU*S3N9vn>FsU9@b~K8N@uaQ(6XBE zz#E^Q!~_azsgQ=tBE^|;#TLuNkH`7{^E2z72AS%n>T3pAi3ruWtzrD*osQA`?e@Y8V;&PKI4CNN&e`Wgt-L=qXE#axU z3EvOt9gK$M1*3{M)ShZhy+^0S)M>AQlH3v*UH&2&sxlGcu_Q}kepGN9vL z27WCPuppk#F2q+9s20ISfwA6LPWn?&dm3RX@gm(Etv2~(kkSLBWF#t9n#cx=Z@jeP z7&@_Y1D|>*v<5N}G~2~+>~N4@?>yM&NI5Y5clv(8W_!WuGZqskz};-II3ir&7**?nIy$v$GJf5?}^_`>b0zKU9cR@I?4QgwN002AQJ4nG9KkcgT&NVeNBSt9N zlB=j75PKmA&l;HVhN45E2#~KmJsQ9j;7&z4Hh3fp(Qysh6wXCU2Zy8@9EHm%T5=o% z;__FVO~NZ41ILR2{hrNZWj#zbLT0TJL8hNNevttIunlqeWm1>~04P|Le}kB_HRB`< zC`6$TQMJ9&43|fe2_PSn#b50RS$5%cW=o8dhcX4pcos~7u4#F0A z6_V79mr*GZDR-Uk5h=k%j;NoCtwDfA+x_=hkPc1wiK>LX8V^uXa+Z1` z#EwKkhhBi6oH}3?P7&_V4dugk-VEaj-Q$U@IaqiGa0C}QT&^!hfu?(5MxIOlb+Ab}2h?;@L z;KWly7eSOMlEpz)&Xy8;3+-kQazHF;7lEtL%S$R0rbu!HsD%_@1Ukt{nuCS3ZunHJ z@+-{zMiK_$l?jhUK0s`RAk<#bPav8noq)Kl?yVQyf?Eb|-$4`w90<~!>XyOZhn>~| zx*y#P7aNiQ?9}LNquy?`u9t-D~o{LHt`2z+1|Jr&OCW3UL+<#xp z%&@ImVyb*q))EPpRTWbBR+)JNk;3%CqhAp+TFoiV?hhyB+&h%A;A`-HBgGL3vxfiw zzPssj+xI>9zNMb}Len;IipsKcaU+kK%%ZX3>c}UGf4g;PV2Xy#qqg)D-RLE9W0maC&u9`G>-(UF%P%@_XVFjxqW6cU7wj$sdQaOcwlShxzURvBlF*PS8}u2 z=*d7|lT(fo&XCat=ek&# z#Uy4^M|%3+9xaDrkFlG&-v7vmZw-BHNOicuUbq>wV5(SqHD=*ajpc7bjOe3=C6vHl zCAuPRaP|u`wIlOCf3Ujk&FSj{4D{8F=y`l+kgjDM>I`ywGwSgrcQK`Y=Ed~+R z(n{}^)d#>v@Zvq6WF{L~SXVa_VAo>+<+`bt>`phr-GGCeyAV|df=aGA^nd#X|CsR_ zde1#~%pNhF37A+gg5R81dSN{XTE@s%%y*kNcg{SB{<${Q@w%ty0~139gArl8wGNUp z=d43^3NS@oUENC^zn4OG~;JZYa``tYaA~F$A;}1FR-Yri?16yEo zcXe~pH!^I3`4!&Vyyh^WRMFCrTR(F9^;SGvoBvvCu(vwVWOD&ix%tmEJuzECH`)zZ z!6hxeV>ov07e35?7P9y>mV}^t-?ajzTH&J!r3?w zm@N4~V)r{+;<*?M%#SsJS)hx(aX8;xC+xzx>fXr1CAUYa)iBz8<0l@vb-UXT#r~8}>B-10-zie++!| z&H|1gP#r+720T3BWH}jWo#YcpHvurv3o~&Zy<^MHf1TA=Wd7EnqrGSP^qOhItzPs* z8M=`7z^19=F=GCd?jI1KHg!DCeNh}egcGC4xr(VzR~K0rwkNN`mBtgjE1VmR7=sgTw$B{moCE}e{z#y^^~SjNlyaFY4?i*LCuZ|<}oeVXGR(BR(~K7!;FjY9M) zvb<2+JOY7v;qG_xFe6mEZ;m1iW+3AjqeR03fIj;z(6uoPo-wlMBH!C3doG!;g2fgE z%6)KA#<{ucbhVu6XgaLRvCu`jr&?p;4|LU^K#xgl30m5KtgPiE*2Iz7grZBTZPfoL z{w}BoTQm2~Wh|?1+rVt0e8J%7JEb&*qSP_W4i&Z>gArhRWxVduxK-s2gC%#kFSHyO zTm8Y@((hwj90@zFOq~UTi%I@!;~3JQ)4Iiiq%=Op2hSf|v=|)7ny?UYIU#$c7s}@5 zznthdaB#Ed5XP@O^7Dduy{=KBUP3N2MpYBc@ktp7;AIq{{H@ApkPx) zL@qgG21b5EA|ws6;nZfD?al+Y4(61yqXsG)UC_s`IfMiS`G|64R3{lGj07LTVMo#9 za%>%Xk|gfgjE|vA5k5YB3^;K;_wuKW2JSz8fQy7j^pVp|QylXEWF9LAEGFA6@>w-= zV7rI@IF=jk{Q-uJmbtVjTVTs%n6elmTj-E7s!=VUz^^6{4A!vmA|u=v1sJfbCd}5! zf`uG8Ktf;}agdy~G6YcHRrn*b)yIuF6AUWD5CIO%k5L_uj4@I2rsnh+AL-x1kDU{G z+ymCfV8s);PnmwahDK34N-M?(;4Hmgls6@A76;jy0QFNY8!&x=+fPsP%$l_a-dGcx ze&`QfSpP!+WBK0d%&Hu&O2c^?ar~XV$6L9D{QmRn+RY1cFp|(uxcjO`k)9g!S$*%S zY+h<1L(ja?yXf)b7(|GXTPuD#97qysDzQ-TV@{H79kwv!cRC&E@4(#1+4v|d3!5S3 zd2F&L#~+-11bigq56*YO7uvywq%8>;+`%0BHBXr4DSM z)3Gj9VEH)Z|^M>TlU${*KNYwQ+HKhra;fZtr`-nwx$ zE9Z@0IinV8MiPKsm%K}vsa$~bMB>@pyJLnBPXmii_BAFfTbWQF4CSwg6<Dh2}5~;p+P#=@Ov!Ocuth9ytnG`P_4B$0vuTwD1TO&o%i== z^AT`+_n`-~sx!TpBa!01y=!du%Epex#vp}J(Dc%U3lkfQO7`M`cyaNq^gyq6ym?K> zfyHH2ZARUEBMVjX#`A_7ULfL74}jf!XkFzX^x|Yih)L>u{0@{w%c-5j^EeSu4{(9i zn84R7p%bQV8ADof1tJ^?bQXB_lMfb6G|+eRHx%d{du{${OM)SLWL8E7+~3SH;UII- z;RmL^;~3Beq>|de%VNyE+}W&eT(?#b!vBj*ssNIXy>?ng!>>!+V<-Gl^r4KL?-yme znj|=yZeNTVoKKJMTkr2=-KL`WOCu$x@4qGIY26#FoG?niJSZc_x@!3#{D#wR`ZwjtaQUb(N(1)ZaidAC+v6!e-!c#{UDuRo4O%0ebD-@S_9D{RbAq+@BK|yV*!_dTF-2(-jKM_}S3VZ{>Cs{FH zrpdei{-2AmC_00C&P_VT2yuWh2nx&}ua!FyQsCdyvyW&h_&_hxe__5~-l>>{g>JIv z-~m|sA`P(^lRK(h&YsH>_Z@{41TE5##&Gl&9ka8*;5EDPKW9tz>J zM6Sb4K8lUBI^7`FZGkA@!K3$vqX7`A|=5RBu#3HS`s zlGmU)0I7&_8EbRBP4@hz;tPL_)FZu1g|m_9#fj$vK5{-6FV4h60cRzKA#dKC9PboI zDrRt(EpcnuGLK&c`-r_}?0AD7v^lnT#DXt!SmhW5+YrmC(^68lkiI_s{*Vk@`(A(kJo}8OftwqsD=9 zDS@Y;^nHwS3QlvqYF!qC(du@?W|*I|mRyiIHD}cdi>LQGo=o{#|3Y)>x90b)qJbV; zO(jocBQtv|ck@X52+asZ0q+r}^%rZ2NX zj>WHs(h^7!P);~#dUc{0+x~5ZEQtHz+Z43Fgf+%D>I}}D$b?+d4-JmQ=!6KB=vAV! z`0$MEA>gutMTR9?whZht%R@?>bs`{xa z2vuV!h!X(oI7VsrJMfK`xO;lCG9oMAZfdcYl$IucDDY5U`*VNp>_qgK4`A{YNMi?( z{UZJ1fy3a@Dn3aYsU}8C3Y&(NwI?+8pEIu!s4qCR?rE)78Ql$Q7Mt&>`LKG$u_0s% zcgPziHzv&m#s{myMr>XL{{k-Sy_W&SC4)_s-J=t8vXOjOoYtmCh_*`OS^U*_kOH(r z|8>xezq#zPyRheWxlif)4|q)5^RE0)lkH^V^b|PZq(kWErZBpAlFZH{^Ty}7@^o}e z+ziGyt|{J5I2aj9!OiKz`S)o-sT2INDNPPLfhBALZ@(vuWbBx8P+0W0P2$~=bU<;m zpEsNyo}sE{<=t1cb0jmM2|L)Ebd){Qu}ucT>=--pjr%X)DQLB4&q?gdBW~$^2;x9= z={Lp)u=iQj-aqRW%d&FMrb6c>3v^N)?`rLYF#S)-#y(Bz6vedD2#g#_(RmTC1~~UM z?4XD#Kp@jJh{JpD?L}%fHX-=NzshpkL%l90>qp>>4{`d5(v^b~0cqGOi~RXy=)px5 zRQR9Gn)TWt$-!)zx6G0yW1r0!;EJj?Y@~R|Qvji=wC!SVCTIFw_}(qr<1}#cJ!*EU zL5Rpgpr)aW-hC%C>dsNf$~Mg&JwfQF$(Vgqd3R*d3H|;%%(yc-p6W7&QG-pirOuhs zB6SM*3NkLb&E(MGlIXQ_ImMIocgb1X{Cj;`40hG-pK&4&HX(5?iUT$7=f@zb4T5@q zz!&_eYq9p>QG=FQ=I75m=ijkn*`M!n{2z$wTC)2uY4M0O5680|BG>wy0O zpPuI2opUpESz=|yyL-#(57_S>>yQ@#JbSug^;EoZgYCb6W9~yI9C{0ymEs9)0=2Uk z^b=Usac5tC_39N%xderX!z11aY7ZpSUhX<%(Pva2(UY-5 z23G-F3nbegWf6}(@+BiWQd&9c9&D3SVUt#L8)t1?GxTJTrRSN( zszX5u>h){h`Z*hZ`(6-YT? z>_^q`FY%X>LvfW#aBqBjw|VD!`m^g0y~=!(SDPj`>&%xy@J3EOYmutkX-=8;2;hEx zNky0faVX~_hJXh^aO8v!(ogtW6-FL-751*S^rGV89qRoK*M=7^Q9Q}P`Xn>C5#wy){=-DCiJIY1Qe96OL2wiu8cE8Pf-Ak2CT>pVie4W8res@k(&nUvNL>N!Aq?IQ%m||7tytiSHK`(Q zrsOr8TWEFs;l)pN4Fn>Bro)(2P_HAcHY)b*!n(9i#5p zKzsv*=T1rcR;8VD@9gCLWseIQMh`PRyJAe3jLaW${5W>}xH>i+s@cd52&1kCFdif} zEQWTUD!4f~R55)Wu~rsb;eP8&60tlPjP6h`Q?d#se{VXZQ7wbo>1QfijNIaNUl{Hy zNJM3b+Rbk=3%KzbnC23<6-yMqCqjTJc!4xB%qbJXuuNsd=zh|d4isn(5Q(_!2-;868lq;Rmnp=Wp-%TI&X0F_hgY3L#12>ie#PV=L0^WZ zFrQVNt)$MXS9xQqb=0Twe|$xw(iBGa@)y=0WqowaiA9=^Lx_CyZvs33yXDqis3=r% zl#>4@g()y26jRV1@P;vo4|PiPyFkM?TB&3{dgyoW7EnpZ{hu;i2>nB*Jc{qgw@uU5 z;-F(1arKLC57YD)QO;OhBSD{18_qEdmfb=W_{3pCt^W|cJi{R6VKpu)*>sSTW_^9V z#~=W`nLYvqYzzu#S{Eps$yO!D1~v|=00b5yNt@0}RIe2^pt=nbR0i8-5j+za?Ldlr zFe9WcAC6Mv7<9vdfI_Ab91eHR-W%hEu=SHuBMEqbQ8Q7hkmw6Q^*5sWD=dKaE8jmyand0B&@a0fPMs2$ zP|UOrdj5SvVjatNI;*rCF|Qf?vgc}IOiu{^$eoLY$#lG>jK1JF2DKag_&UuUqKaMw zOOp!_<|6UGfBg8-1hT6;8W=R-lSU|DSf67tOy~on2RMRX0s~>b{`%qS_pIiw-L)l} z{(bXOb=PbJFTx+*z?cK_q)oF1mP3$>N!sc!GK!a#UxM*Z><9O0D(vGOr~ z(F}?7W$uUJX-v5>AQ(`bDSaA()a3Gh`2ydXPS_E8N0++TJsV@9ltawT1=5!9`*g)^ zp?MMxe75SmK40&bR~QEKesV~Am*FZ6T1dkAmCZT*^|Ywc4*mAPqpAlJd?3%)mH87jqb z$0Xb?C5`UQi?bu4BUIUf1&XQ6YH`;XLp1ds03vQM+3V;xpW6T z^Yo<Uh@F+GTg#7AKrUI+^JZ1c1zvG! zv^8sEUz>IYyyS753Bt`|fb8A1JB^Lc)t8bYMrz{)=x9PBD`H~3Hv+w~bIwYPeik^NDrzF3E9eYy%V7X{bGGcxvq<9rIT-f=sC&qe4FHe9=}p@T%g zgc=6}f~`mTc0$&|;M6$S=26{3RKm>{0QRgysMyUVf!V%%tn3ph*q&}S6+OV7fXVx+ zT~)2WF;(FHAsvdw`gW+Ve8R00P-bOeZrr0^zEzX8!c-m;WSZE!h}#DQm;TycbV`ebGR zev35h%&`GZ=#E`H#Sw@Xaa#c~kiUM$SuG6>L6c=8ttY0yWfhwKO&iY*?G(7SV)@nE zwe@4g!nPfmHN^>Utpp2}qHSTA2pt>ejPvJ(eV+477@_aAmhPNv7Fqe0O*tcLB<2W8 zRO~T|4_>0|;Sm<*HZd_ldBV3Vuj)5uqNHYkB{-cG*Y3*&;jm$04uPm*`V5Y+0Vt%@ zrXDqYvj9j68(3#(&Bx=Bj&9wDZo9%{&p7wJdmBu-3<8y|Y@efh`(U?iUHFVDyUTCD z8f-B>(EV}4#>ziCQ_+`W-<1!!{|YoWnK^G$wn0Y5SS5Ko=*Ur*V5{-(GafuZJ@_7H zFFWf594>HU%m-xo6?kcTK-UBx9C%h3d@`pTGLJ<57dkTFBv#;PfLvf49?Q z4OaZLZ&C&)x=kY2q5L5CVk(!3Hh{{bEM9Lm;uRaQCxgJJMKo((Uj4?EnAMSTVaVhf4(6MO%0;D7+nghc$oO*&wD-QZXvxSCWGS2G2yu8C{Gv_f7TkvB6krCKQ zu$hUf46-l@$xsZzvYeGL2k@F_GnTLzq^w9F*OQQ>Oq($FJxzGR6Ba>C{Byn(`6al- zX$DXQ2mU zq2JhIHsMMN>rmHFPIG6x#7}uY%0dJ0j)LYOO`8JS`v{#_VLMCAmZ)_i0l~-TLKMFT zvH(?2MAd??ma5DGcOBEI{~2DNe-pq)3fOece1Muf6MLqB#B zWdhB|!vupkU5q;@Pn4=foOW?04X|YdZ;ir-;h9R~`GU$y0pQ+`ph6y56JxFonoV6A%TeJ<7>E%G&{8Jt^ltg8QhTe{Kds>U6qvVUqIh1DP4HQ187 z7?Y1>0j%{}StH;AhQY00D{;Zm@Bf4NwO@%)`iC1uKFW$&upP|PZc*R%V+{UP3M2UU z`HpQL4*lvGIAvZFV*mS9UCSgzBSZNW7A1oJ{bkeQHMf0mgSpV?Q$6y}qj4%liPBm< z)zhnf#w2w+{CT6x{>x61S*pjkaBxV;ItTy>AMg6UFO!btp9}*y9Te|85GgTsWPl~b z=Jxf6%?0gWTT&fWBR0p|v^b67IxGHoEYaZ^&$3QG&T6ReqKBi=hfu+``hu~u<=}sb zU3zSN-X9Kuck{|YxT~D?-^TyD?|-1A7_N5Pr?Y*A{7rat^o4$=hVSUdRHY6Nv6{7bZAuv%QoTsXR*|li8;$}4sL+2`s)PdEo zH=-}Sw%aN{=ch<$VI1Y^`Hwl5z}Pw8f^**Hs3^y+Telbnw9)ej0gLf1%QfG&b?aY3 zmSHfQEUe0|sdzZn2R{{Tt{CIa=^c~+v}B0c!~udHq3dDtDd~t)QBuS3C*BBNu){nR zNr2DEvv=gn=KesLh(x?WZ?)9HeqS~5KbWG(!c)2d7-5nPH!xO|UyCS8jBUG}9Vg_! zu91J@xwgql))j_Exq3Ku&SSfhk#qUarNX|Tpdk~1@DX7E@#v6`B2LB)%lY8#TeoN? z0;!HzgboEhjedD98f@dH_I55kKC$4KjCJDT zPc^5G=kT@sqCoc_UvmOUKG1nr+QV^{$G)RU{(gVn3MEE`2>crTHvBXGN=*3P0kQ8_ z%+FtrnD8^>p8Ob+TCWY7+SuE{zozBw5&5hUEM=DJc-w7H$@q=wYw2k$f^p!#tH9-p z^{+`Yf(K1flOw^gz1zER)5x_k6#IL_s7VivwF{5>ZF%1OJj_gIdFDEft>T#fS;aF< zdOvW8Trd8V?ycYJxb*4nu?Zb{nT`(J>D@&b@Yt2!`u>C`=N=kKsR(%cWa_mC zV5>UO$pM5r7_bVPN=4`#PqPdJ*M4C(1nhl1#rhB8snNJ}GJdvs6GRlnwNXR7tU!z6+6 ztL1l$K;woyf84yV9yBHi+t%-&AtYq?Q70%qrbftBB=SsQZ*@Rsy8nF}n=yXDwCQoX z{~+@+F=#PZnA+OfiL#qu0YV%P00o8B)zi^ps6lrQPQA%#8(uwSfjFJVF@c^v)!BS6 z1>5^>j6Dd!p??={jQ9iB*2#UJpJX%d5O(d?0a6WtHo(}u`-C9o^aC8m_aGGzL(4Wl zbX#B>b7#xBqTE>GhM0}7UJL7eqr8I*q&w2RKfRiKEbZIcMFtxr{r|(aA@0q8IXA9P z|CE0*YWfL=K%hzROT7qgNvrWrP{b&91=OyiV?;utQNwwf=G_35&R=yAm)VmNx9U;u z^=BTA$u~P1CnZh)IG0vd5C;gGBmaR~g;KDa@tSKKyMaPbcv6Y$4?=;_&XBDI8Ng0T z0Vp{==Q1k`%LrTS*)wk9FHr~+8U+6MOMpa#cUOj9Xl!oQzw$fEWLv@+oe7uZei$6H zpP!Xxoch8zCO>?7SANmMviXanxl09L4*Bn|HFm2kf)$pr{F_{8G!;cgA?k>bYAH!c z$x<7#5GE4_YC}*h9z~I?Hz)$#JjJ^~f0LwZgMF*p>3ngSdq$b9)dEJ)6X%s%_K z@#8!_J^$IPtud`A;i$>L`yVzr_rAQ`(`Z!FK6ZcSO!&8a;35_taVa$P$SM_r&q1&I zRmg0GLGR@x6_jkCBdlaB(G^LhMg%?0+qZ$;wxJ=h@}p?_<2a)RrP1wPBZE>O>7C+B zZyPm4vq4mYtexnF;3I`OTm(#I3BqCK7TRr`qq57sssj*vDBLAOjqspgw?S2~w z6A3A!+{o;y{rZ$3|vkI@0XCB_E!ygRSk~vs7loAuliZ zTJOcSo_HWI`R(gbqC8Iv4qSzV@4PV5{PT6{s1)xTYsbMMB1^s=k^KBMG&aT%Bd zmstPbSB58>MO_2S;zgNC8KOlJUFK8A7lcHNyvH0wgj(~qNWvWdZ^~$dQq~UHM*Y_+ zIrmP8C>abe~tbNm0imGkaK3j8aplPf>kH63mGvS*BsAL9!hK=Ac!GHw6! zK*hZNl%+KY-@BWYN>8!`z>*)khw+V|EUV2MaYh`aqG2~H5!`Zm_N~P~L(EM~ihu5E z)2$pm=y`+!@ixLojR?nfJ)F=w@IW+r#2++uzvE_JZuqBI<;INsX$Xj?Ldn@|Y&vj; z{&W2LR`-17bGotewS#b5y@(Tk#8|Bn_6^h5YvILLRJD8mRCyYPQ%e{VecG%$IFT#dkbJ!G)v^uyIy5)LK^n;yL*_GP>@dU$K|t{$$7((< z|A)0UygU({+$gY4OKnJX3`T*)Hb#^cmzjZkIES43_y&kbKp|o30>^GT_370fE8G7Ox#TKK@!}o}M(;IRtT$ zm9NgW+#n0{pMPE+Oyd)1;hORE0bBxp>;C@465Y6sGf|ZkAhR4Hb?!sIlo7N+4K%6j zX&3_}VPt$Fvf^EB;A#!SL}F?0Czdu`m@d2jsq%t?v25@YbN>4iY08E}rVw$Qh_+M3 zM2XZ5T?fE4D}qk%7NADLc__#Td;9S`OYsyMH7o-qIjv;yQ`H@50EKqG_An4!i8ija;K23cGG^z;vq*VP zP?wR<9+|Tam)3g+Y8kb5&dQTkxz5XL&wI}2_3|m91W-{g>*V zxJEy}=(<8+Sc%(UYwR3VA%HXVRq0^#QzUuAaJhWe^2GFZ_M)h84*a(BWG+r;UW*BBp@%u|<>m;BQzY}?|c)?*zZD<#Q-@4FDF#$x(ZMswU# zdJ*PpAV%DFz*eery|b)_P(&ncFAC0VIcvfYd+pa-pzBlbl1>J`aA=MvjHPx z@`oze4gDqCa0cXQaO^OG>-o=5*G{h2;bguPH93Duf292MOpfKp9o-f2?^fQf+ASr; zikF}dtaHIAm?fNtUh#@E6yU+3p-Ru;?UewhzdFyZoYU7)FDGFX0#ljGLOkR0JLhk# zOH6K^B4q0)Q+uO*+_d#NlWd=1rUDcVnVdS;KCvtJlS@`cKO~OSx2V5z&w$@bp zHL%MM&P(?d&vX-fx!m<*2euyo!=75zOn4Y;4SuThfYp4N`mL zpF43UrS>)Tj(*he8ME%N=6l@K=evT33}2NW6XGtt2B#$5?u`_jZnUwVc!0NI2yY}` zSDJ<4=XInNizqIf%S6p*0)L?w&K19hKb06YprvtGoJih-Y*) z>+s{{PW*kjE1xhD;}Z~YD|!!};ue+3%;w0`o*QRf7^s&|;?Z4wcwR&Obq%}?)hFEw z%bA-6f3FynBb;YKz2icD%`e<-xZC1~$G0Ur@Qt@*ha3eeU;O<(XK%_UC0P>6wWlv% z%J$4S|Ki46gBuT)3NcX*u8+rgU(S3X-@#wD{)M`e{9Iadjnnyfoz4Eel0}i{&LyJv z>j35UY+=)b_6$B=CR{0_Z$-}8rIqsB-zLBM`YCJlHawW@Kc4SAK440<`u>X^+ITs} zSr~JUn-wW$NvQkV!n8#6&@1uHn6dl7e8!nuabs#l)6F*lRjXv&43rzDp5di^Bt>0p zT+#N}7@wp*-DdYVa{+pxTGEHma}L^@t_I_?O8L^o#7&##D?V?JIHs2ue|GdDn?8H? znJMbUgPnxF6X@kxAgUj_x>E6<_W3^Js(~p^tK4vTr5Q#;y3WqGa2me_+L~;UHtTVPTcrlO0===QJj&&>P85{NCMNe%odpHl?qJ*DqaWu`RUL z7YIdL(Yr(6?;ZS{YJ(5qBmV8(;cjo>r9JSDm?JK(cE90J7LJu0IDx$((}j~{G)TB! zRp)kII=BWcPX(H$w=h*o-&LlDb9)Qeb$3ArR_l*m?v_FZQkWhm0=sUn&A5Zs;tJ%= z?HR3?<~40;J9Kj^pL4kgy*o=IO!VL2AblP>j&YJ)>3FjFP5*RTH?4I2oGXH3*S9sz z|0n?2c1YR)CY%AT5C=5c4kzGGOgkdZpHB)D@mPEC;6b+5ROpug-fzxuX}OCizXe-w zsbZ{Mj=ttYXJ;}pj^-^`@Zy%1y?r8xMKQmAd126|g|qsc;?B?^#7wsb2s^!kB!+L+ ztgdPSCxvDs z7fk6zZkbH?y3-L*_R%Tn~T0c2GFC_!#FfjT#*pOej9G!OXd*Hz4 zDmns~+A_>JTRJ+Dz+X`Vd|qT!`l7VdO;3h#&QTXQ({oTi+vFz3VRh6w5X0qqo7hEyAE{(#&OslLOVk6#4IStBhUw+cDSh)`4s8 zEwklL1b5Cm|H5{=0==0TA{J|jB6k;pFB@B1dlYKbS#GJQJ#VJw{?q}<vKuT++19zC190gTInz+8Sk_*Wl5gFW|sh#7`35(u|vwOLu@DhT<-&*M}V1j zNrA;jztgl{bHlQ}i&mSTq}~)s*cUkZEOaC1#HuKG+Ue_%XLi=-m_UGGgvK6Hv86c! z&BvwU6mhZlTF9&8^IER@mSmGW6#CqIP%9(L;RbLZ?2@G&2RQDYV(ORK;cK3{bYhB<3n9Z9WbEsx-AqYg=2H z1q(J{GgvT3074#_`SU*~cF4X!)zbd9($ePNLS_}j|2p^U4xD$&>QutB{;bJ4Y9=RR zDut5Z0ZM{TCx;sZEW69Q7v|X9san6D9X~#*^!=06SXim)Y-VP z0sxN@Mo@)6a52tEEZ+U@mHeK!@nIB7gJLTIfM>vtjEs!xJ3CK;DQt-6@wCU>v)dzL z7#phC7UH;bsstO~% z=-xf7Ji4oYDRgmLd+Iul`*o*rxXjgtmxf{BOTAYY2O*v$^~6+keYr(=X5#E_3-1G-Glg{^#Q25*`(`Rerd86$m>@%Vn9R zFQRKF=D2wsv)cS3FPq&kNO80zwq0@i)GZXf*whUVWYx)+zIaiNt}vB0GmuU)TQ7NX zg<`i^e;SOTr8Ad0U$M-^8Q=|N4ebPwzfG$2{B$*tZ~F8^98vbjtY+(m!G{Cp+Udx?{d>^uWcWETKeqtk{fKeqQTE+b8`aa(#3?( z@TwZy{x7xLrmXnt^Ci1`qfxT4(I%DNaD++IR#)`s5#P*URL&_2vT-+PVH7;i~7_WJdb($Y=X@(u7ddfpmrYQu5+VE2d|6~J7F z-;|ZFzPCJUX>EF#mwmfJNSFUG6<*quWL887+R^#4V{Qcv1u$I_ZpsJGXeUU0r56pxD6eom-5z{cR^@ z@8b-?dMmyWuKS2|r6B_irO;{FEH^%GoNxQymZ;*}@vR%Ty zxvWkvd9*lqAcw|R=08nX4p^tgx2=z%{D`4D1k~g${vC1gqS>b(+qP}1zGdY{gxAHp zUa5kVdgrXHSt1aFs-h=mdq9>teI4k$Y2VfWu4yvyxcbZB%|t_R#pZEy0+yV7x#g@% zT%*JJ-5zQ7s}9x-7|p{c{(4yXci1G**Q(QY1fp+=Kyid#Qd3hi0i4D}+qUkV&d`EBzQ4m?I^%2D_>eM05X-vG)LrD$CY}%ha|3 z?N)*qux&sQ6fl5+AdQUzDj-OdXg~o)lpGXV1zQYg0g{svBr7F3Dk4e{C5nOql8Ydc z^Y`wm0-d?{zt7JzK0PyyRi{p!z4uygT1c$NE*%4boFJNJ)CkaDJ77|_7=9Tq?Ki3F z-U5lr>OGgjfkZue{aPNq*a4jH4}5A`)*pZrPmNlM{4LqUDm+?~h^-<@R&oY!7pH8} z3QEz8EgxECe3|!xdj20*dKck?u)o^udYuRsA(W{CUvC95gHSNn%9W4MzSF~Q@Tfv6 z74%fB?)q$#)$q|v#6TYfY&FRohu!u7bRE@Y`=;Z7>+0E<<1DR0Fd)jyBgpPg#jbo< zzDp9QP!040Y&0m8&*DnSwrmva>nlK-ap3_SpH|$>nrF7KZY>U4FKMSjR-mF{k(gfL z7E~|B2VE9kOdUS-KuR+)$oHhLtE2b(YjaXFCWrM+8q9mS(sq3o5m-F)N7+6Xbt6Ip zGe6KV_!;{|A~HBkA-l%O_I+?r9krV^urOHIW#Ms91ag9UiAdN<0RXAPeWKf}zf`={ z0R?dtfNc6r$}Yp%Lq#AXFHeRIgc-;jFFw-uVj1C8G_|yRi+n_SpW4cfGOHz;nf}bTdxf3L`wYNdYybV5YwyR-AIRkv%nx;Cpn|b{$J+@K|%4j^Cpn} z`0sPhLCR7nz(Z5S78Tr?zMEIPmBSF~=`_;~Kmj4@9dzvjvpt>C%Qv^@M~ zqa6;q0T_SjDWA+M@E{Fh)SyZ8>-$Q@6VBcBnW^taBP2Hcec-@@51-#QZ{E|$Q`b0o ziI4uwc$4||68~Rn-c71s@GLdnv6*A>*JZt2cFShI9r90G=Um;g`rpIp0(@jnHWXRq zCq1NWN>godCEQ>q0tbkJ_@_{+uF5GRdm!Yid5Jg+`X2#(08w0hBjN%{_ez8KaZ@P zf+-@5LZLx~J@cLgk}2p8ttVwBD4&+K4;cP9>!qLa?yG-Wd>1Kn+(-4~|XQzvwBNFLg(9e<*QgSZLI61f#5Og{0P#5B<43 zvOnBT0t3^@w23czur(I4n#_#?E`N@;zf_HAKE2AA_k=_A?bKXmbzIi;OqM9y$#FC} zMo)Abh2n^mQ0k9W{(`Ey0x(EWGXG)20&atf>;U49ERk&|E&b@I3Fqh zBp4w~aWElmG9Kthj$yEmPZWgdy+x>L)Rd+8QGohn{X(fR|KfUbY;2&+P5aZkfXWS7 z|FACS>({s?+3?x;OF5Uc}C^5VN_V_jP64YvF^>OTM%|;yN+@+L-3&3sEBF+ZLtF{V#9~ zRl%hM|mpAubnMgw#FCXGxdZ-Q0B}ex=#deWzlOkFOzg`uRIZ=n4 z#o?rbRE;j1Odh9if1R&OJBsLnQ(<&mLIiTx%+OGi5*gn}OG`@xQpe8X8~HxmR^988 z>F1NAkhxT9FY&~z@9yWs1MnOgO|`Yf80;N#J_E2Vjdq^<#z4KF z!BUz;vA9O@yS>a>N;05g@gf+|-k~a~M8bW&&8PGJqDEKoX)58#$hU`dQ4^~>2yf%{ z<82PON;w!piaCZ8mjg{U;&G!9*Gdh)etmvvkc1}Y%R4mRKUfKXiz{k@i)5E&a-1*I{4LzeKVJKu7t916j)4E=bX(X-Cy` z4#%mv)vol;e4Mr3){#-x7n`_VM@fN-?GT2H90hs}L@E*?`K#XUvF`LDCA<_;f8T!O z**BoNiVJ7|c3umfJD3K;awU7Qbc=xb{Pia#&1PRz zZ-x6aJvZ$yua8%Wruj4Jj3pO4*R9~|K$k|)XyS-~tW37-Y3(`EebZ9i0)8}e>HU>rT$I$-KKRn!ONa)b= zeG^aM#NbyzIie})BP-isH2ZC)B{@&h4agOcUIzXPdaz^9Q9N|~3aF6AS_3Ws_Y*;6 z41Q~R`<|_o38T*F5UVY1w~dAld-E(FUc@|R47Vx?@kt_=$$yV!B#M&6g*{*D-D@4TUQD>OZgjzp3-$nhbG|3pGxyRboKYMrpo2(+;#}>3%jsDz z=PVFqRP}$bt={<>aHoZO@!OutSGT_`rHvP-s&{YqW_Ao(o)w)r(2>@_6%%N^$L!laYGPmE4ff~RoZ~&6Z7T9166y{Yz!Dnb7J$>3r;*I{C zT^lleYj<17kQNYkr4kvFL6Y{Wq@aQaC>A@m3BDZC2I|??N1(N#eVr>1(X}j7|4^V% z=9IvZ(ORdS+ao=&x!!z3b}#xRoef5Cfpk_xd7y`9=jM(^i7&W&_XlmC8$U0cFKCdx zfJ|bkm|&FfYWO3Ut(T62mqd{YLxU}WM{}pfVo@lcy`)2lsJgl%AbCgszeeKEn!4TD z{@u<1MN}5=j=%Q}7A~MmXVFNnMkIE8lWHC|Ce7}!45e&7a#3k_FnU8_SG|=d5x2MZ zp^^U!ThlR=S39BIzl_jKVWnR~pGgGR>i6;-Kj&L8cF@$KU>Wl?P+TZ-M(-@ZR@0h< zAiRJ)cNqH<)tTzbMLkIRvW_)<>ivtM!9j06loA{zKr%SExT25=Wst%{QT*6^@{m`s z^K;QP+@Cb3^mArj7mxFwyu2LT+#PYn&+lme#Djo3@d^q$KcYj0D?wOd7=49s@d66< z+1Cl9(tWfyqV!`a_jO#Ww=ve9iUaNfk?O>2(_Ve6S^k9QCoVu82rpa9HJtS19wCHb z0SyoX068IX@#@?EQWT>l59Yku^mbcGk=-agy*-$9bzHbQ{G}*V8663kJ>@tyf~>XE zxPN|WINqM2TD__&i!mPLyN0;26V+1{sa}w95D^tc$3Z2&XRAwjb8N*vdg`rBz{4Ecvkq3etJ4x_v;hKUH3hA}qfAS2z4bF~a0GSHAgbOq#_RT4smRJ+C8QgcWDHar zgltk+G{@_}N{_9T3mj97#067AoX&k_-Np5|i+HZP;toB(W8XQ;L`W*2$=hq&bIxWd zCE{Jiz_e*?ZIGLG;-#&Xd}$f1t9yg1!{XUXSG719JTl83D3eh=s0$zKreBIdAVuv( zir4nqA>HhYR>4M`t$GK_{ApfQD;wVT~)cf7Axuio^5Ntbr0cgf148#f4PenHQp{ea-Y(fbP$ zlQ(WX_9g=C?OD4le-G~0JM7kjz+!y8R>+v#^YxC~9Hp_^_|DasXw7s^)?G?H!NtI? z(4BK;s#uH-nA};lO|~kwzb>Wxry^q?2KXW4Ibi3h*T3PFC73eNfcegZj@)4vA;4`lP8ZZqB|0`+#rQhv|)aqIc9J5j}d3fnwUZ%TXDudNl~adiW7l=E)ZTI)X5iW(1+Nc%y0^!oE&LcGdj|a zqCx{XRpj={9Xx1IR{c%%2cBQN;Fn+Y)!sK?X$u}tiCeN_j==e;%a{C=+9KB&Pv7o| zFzgljf_yQ;s%Iw1zTn5~9*5BupF!--LOXV-k?!d7#c&?CfUSoQemeqb+DK7wZ= z3Aqw5G63;CY^y|N;Pu&s`b=oFr-ZZnwd6TpwuBFVRn095VCZ-9LdY7e{!zxKiNi!w zNdme-XM?9ZOtr##CoMaO3IZFVx;(73C{MBc`}V=NPvzXSpBe07i?_>U)|L|nbsOd= zo%vBm+TgEEPqgg=XCjI&>H$GpWDyPS(q*gY3IKp_^@BM^+cM2AeF z20T90GYJj}rN)*Zc{Qx|Zqi#HJn~#Lb3^FEp2T-=nNLna_cq4jwRv)WeF(YPE_P#e^6C;~dL;Z~x(w904OwcF7wjdZL3q3s~R)rpfQlQFAt z!TkBY>lV7motMO1fZ%#_-+J|gbRcWYdeSAgsU>a+c!*f2>l~ofCHQ5%RmBmX`pBo~ zm2{pM{$pGg+#Pq`XoV2RtVBQ^XQz333n!!&iiht?DJSy36)Ta=o;P^Tn}@k8TWm!Y zN#?sevuKpH?kJZdI5&X8({0FgSKC!*agFc_vI!JrqvToCC)V^`=s!STppnRX5xFT| zRn{iSXvoUD$Nd`{vr%v{YXp{rf<_&rQ7IY*4#8t-P4?K#)N!(sqgVUs4?ze6$J3@A z2-tN&Tw*b0$=(N-4}=8in)_CXtc;bupUM=$RXG+)j)oP%TJ0^eKB z6y8L`R{nqkn%$d30xG7e7n2Qc5gIrmO8~3jY_AV@GEhZe4!R}oa&wgH7%*PRF-%;l zJ(p74iWdTSUV(ZyA+6e&wpm1^*S&B~Ls_-n#1XTy?j0pPH%;RMivDKq2Qo)d%aKG> zqHW{m=GNS@7Go!MfHb~C0&Hj8`Nk8$6SK7d4J~oPJK4FE-4$|59^`K*FJUiD#9>0Gl}T3s&J$E1YDW-%!hkEJCcfqSBg#J2 z`Z^z-sisu1>c4U%x7AWjiKpVwwM?Oo_TSb2jgEX;!6}qPwG83zGI1P;Uej864)PH) zPXdK@an~yT9#<~?w5F%W&C}(|r+fa;_63>(Ra7?Li8&9xTI?RsY0_<>dt}}j`H&?B z*g02|5Y&>l84$>CzlGwcQXzO1Ij$gEsO`4FSxmrpHK2My>FZ{JEdlLd4R%q>MKXT)`=lQwfusw6Q__RweWCxrv-42M674%>pl>0-k`GZ zO#1|x(P~LAzf$$VZ9s1AaEn7;6PRJuajBpm+uBaV6y?JfYcz(5Q&l+Z%|4@sbZ~vU zoJaC8Iwi2DE&hq8-B9Gvc=PT|?i#<|8XmW+L}x@pGdpXs`SfSE(UIw&C@MmN{7h2? zX~@IKt4^X9z`3Af(;d`ZyePthRl+-qZ?ex93-Y-nsgO9}Agi!w)3Gqq)7eQ4`{gvE z8!ER1r`+)AIvT1r9) z!7Bf!3Nfww4?gWEhIUzHEqO|Lw>IN!cE}7pNt#h3XCFFQSxl z|5bp+>&aZyio}&a?;cXA7mdpu#Wf6zu$H% z1as_V0JBlKxDP1n>eg{24nxDBNCo)+{6x#5B}>%c_QTseLY>G?U+uzi7;2`spJ+>I zTqnniYHTh$-y~Ido)d5;Ou>z-lw7~UnI$@KX?k)PI_;~UdXOMwiV*WoFn9!ba&~qG z;_|HF1NxfBmESbue9yFx~+HLO1e{T4cev z?tlap_KpBF84EI}AmbIHOR(2Pqec|mxwG=kqB+ApW@Qy-H!ay{ zphwwm^%yZvzPGL5hyXW|^!aj^IrB(QT7R){9TQks+$;GE2@gOO?y$O8m9-Cu+}vUu zX7hT;Irc+ul4G7TYJ3^gyJ=lo-|5`t2x|z#lSl-IQb>N^$Zs8d$ir;p)0Zims(wZv zfXWErO1ny6T8{@et3@4nxl-BduzX(W46tjEn-6?;uPpD#$!kT*fH2k(n;Y8weTC%F1eFK0Wg9ww}U}^8JT~GHR#_x z9r8YkXe|bdUOv8X>JXD|7FdL539+<#00h1B8&$y59)rby!NP?S^zRmytkZ-2Mg%Eh zzqT*(m}0OxE;KaD2Z5UAnsWa|1uuj6(B_B7_|HJLmGr3_*~q+)0ox}@AI4&e-EA%d#0 zGKZF_5@O&LO7ra=j69Rl3CgJIxMV*!ra05_ZN}ay0%0egb9;c4x$vN7w zAfCW@1Y~?nIUjJz(lXIb2?-*&)nt-Xx+pir)^fwX;}_NjUGVkpU-aJ7JJX-h3Cbv- zYzjk0ji6fOZJO}H<=BuIWU7`2$E~@-pYrGJdzo6qLs3~lMk7La(a9(@_8>szn1e$` zPxi1r*T#7b=2MoY=}N7EVkK?|zeK34FUxf#HLFW8C^=DtN8k=CA{yjv249`8Hi@Fv*4m7Az!@aQBE`g7N3yZuyhEM|kjB`B!$SUWE{n@L;!#$=9f zOGAO0|3$&iqM3WrBB8BfnmAlmv|$MXWK|^9>-p6BNB|qeUQshJA#cYM%WOvS93;mG z?`d?O(ZB@@i;AjX9xqV|6Mgqt96{7@l)0v0q^RKp)gy`80GUc|FV5p5U=ROW_28}r zJJu7n;L5}@qP2e_ie-iKm|sCrUj7|UHDNHy>Tz0N$9=wBe{M|cz{&ph<7(kkEW%pL zqmWU4ZebW5@&vJ@o*}5K8(JDj9V-A#ol5;gNIh}^XvRe){-mtjs!$)Zwe3#s^0g}4 zl*Hx+jrabG_omd{!l}qOd4sS^Ly>z%kH`E4S_94QIg}?n9t_G;E1VpUE*zj@)vdW?1YVBkM8!Hhod8&fF%0 z1qMCp?q0zs-bk6N)t@l<%zWJt?i_P(7Rg)F$04_ z(XbuuXLpJH*39C1Bm9`PhE^$v_y_{70h}E}9+DNpN@%=uELJ-1@aPO^9(2?+~ z1D&J-0+3j8cX6I;E8m58pO+kDqa75i4-=emmo%6b2=ig|)e%5)W01kAV`LanON$3| z`&Wam#?HlIYo_&+G|G7&-4-VgK|c|m90!h&C~?M6HxO+>2gJOZ1X4zC-5KGdzgI`?Ko@0MEXDq}rf{}?&>t<2Lv^p@sDF2Kn9A@<7J zLDQvyOF4^l_0@Z=9s}Qa1l$VUcQmkv)e<&w*vy>^lA$vtcbRa!WM}XGsE(d6xAO{i zd^My5A^18BDZa#@0mGohP_ZD#4@6M&0hRiYv_+`5HE`b$X|xK5iZV8Y3W!E7(7w(Q z_-jTK>1wy|hbYo8^2C=7WR^$dq;)>#7!1IJafoW3(Xp}Kx<-#wGsI~H5ui34>)yoD z14-waYQnUk7fiy8c}S2q5p@VkF6@MU_!b|b-q%a1f%Cx{Ed6WqRAR2efZs|i6-Yc~ z2yqTGzE#1Yl=(k>Rl>Syp^#CU;T3e!FGYYGLxFb;Gbg3K<;}~0l!asE%9A#SDg0fx zDq^h#gx-icCSB5$8*^hG4laI@-^0$szfBYb1Mt(eM+TS+`i7w0fheT{O%*ZOVOY5K zfR@KBOgE`N}Wqarp20IXXCc;!`H7jx`1dNE%Cqi#F{$D6jio_&&4(jeq3 zMwNs5;XODMuY31Gq51UOK3Apa@a_y5?V){F)}Qe_BJyg;JGGS8c>xhY<}KPXTpE{kHikhse;HpuVM}*p$ z=QoCGiW)5-V)EA7V1$|J)NqhE#V~VFp;Z@9aDDJQ=>p_YeiD@|s&|B^J;XD#SxRa$ zhE7^43}@iy%ZKpDO{Pu)!*OW(%r4sG(G|QQWZ9dP~1iKiLRAk zxWHbL1|y^I)tW<+^C0<$!@c<5#iL(tjI>;3nvS-#B67h2V@Hz_YNs$g&U<|yK~e_mEX$&Cbwt=mmi_@O z{dNltDlk*XCKuGG4F4OzL~8PabTv1GA)(J7>8!LQT^|g4%XI)}-2yNPl1Q@A8XwFk z`Kcie7y8P6sJ!Errqq?jQ~uSG6H$e0vc`SOh3tj&Gz~P_gEMCCFU=&KIC!M>!-#`t z!qN5jFSiN`hnoibuJ;yDvm{>N0V5jBh{(tyeE+te?W2es2;1Z#Dq(ZLD0JY6t0bp8 zm^pIL!u)jQ=IOZ&I?UQkEtoggu$5tD+B9QjqVzOnEMC;pgivPUxdV3!NH$2bXc_N1!%}Q6H4~0s!j?V3Wj$?s#Ke|-rlnIAH%2OW)ch11nUXn9OVlUU zy`S|^+^&ZC0m+(=!LV^Sn8QIgHCpCu3VlE)lHagbM&;$1Ij;SKGCY)Dd;VkK#XM2R zC$f(9ziGW^_ONw#d}`+;XyPvd|041&vTb5h&*};UJghLC>3zy*`;Hw%?v2cJPOl|! zHFI!s+EiKlw4yS|?x|_)_^}r_ccYGl|FCcXkw4@rG`C>$!kQ)=98rHjBw2+Fm*yAY z6h*}YQ*uWn$6!x-zf0FcW2Oe|dHra~B6y)R(V+~0!HR(ebBxG}+q)ZZ=JKL{C;DUD zP@i>Q(0A}f1}>?Rfly(tzg3HIpzVNPneE17cd9z@2#Vu_wQ1C0TKd=2#aYJK5F z>3kPhFHAG^4?&>gl0|Dakdx(SF6n2^1r ztr6mCB5VgED)RgkMb`*gQ2(H;x1wfiG&ECPtC$=&#TJ}2s^c2u#|Qbf_@n2g=ZyD2 z$37VjkeIk!fVE_H`s02mP>G;n-oh!nlzNC`1~8CGKvGf{O|4j*{SRl%>Zz+=!B4E- zb^^OD5>nneX!8`}Xd&f0el+g;`DuUi&42hS8=JbZvGF3lxebB<$X@Z6+}FE%)i1a7 zAWYf918*b9Z!7MCfttM#l`lE|4^bib7{peU{jQ%pps36hRMD-I{@V;2EpUdXJ!xN; z!8URhB?ok2GsYM?CPD0kbd*ri*yTeY=SM>3o_Lr@QTh>5iU|C_M`UbekUTe56iN(c zkP9$;fPnaQkk`DXL&O*U`0?Wv^7Hf5TO-a@1~0kkR=fXar<27Q9&LHd=sWj(`f{|d ziZ7D*BSP=}p+Gu8%#(>jO1uo@qCPlk$oB`W>0>asH%p zDFAm74S~5CFl%D%oSN))hB>!8sUO+jgLsMvVn(cIq%j8rl`bvnLQfI<8k!@~f6J&k z{oKm>)jBLHuuM9~b; zWemp~0X)xRhB&bt0gojcI{u{zbE)i*5O+D@>-S7&Ax(+f7);VbfiEh|0^u7wxaA)` z9x94m1WG2z1JNlXmXuL6MWgUU?$Lcb>PM7*ZEy%I+7s)cEDSsOe2w=~Gya`CPZe^p zO|zPqI&HA>=DCwLw38Hzq(uk01d4;zz9k5u05amvkGO;;VOV%-cj*ZICbvJfbF?nzyH9HcG=8);N;l2_L3FlP3x!T%l< z&ud}s)^?Hhxse9_>6eX)Pzohfqp%D-DIVm24mGwcS1QqcFccHKwc zz5t^k8CRPa4y~DY;6_}MqFquNdVh1hSD@otQAg=WpCY?)CW+}1_2{0<3Z?`g zZ*2A?3{9JIY)s49J4rLD-aBxhp6ByB<~@jWK{yAie=?|YQXr7+f(3UhnYSNJHEarYKcVlljqd0CT^l> ze3hvk?DJrKx>(2?C<5Mb8Z3vu4>9M!vbnYTBh<`q-k4C>HBEc$vmKqD8gbIsM?dF~ zVA9Pl-YW7Giq~F^EsN7k0d}|?vgxUdMitI!awos>Ua9e;dz7^MZwk5Sx>Iub=T0F< z!f}+<$1(q^pvKmjzX}0aG|=sUb@TG9KU|r5!Wme5&KI&4t$0~F>+Pz@+b!R@$$tOy z>7f4)>SuCGa$&=nt=deJgUcB+ba9bNL8dmhPrNB9fhMN^8SHlFAr1!^r=p~ESN{r< z$dF{Mspr`BVH+g(2Z-bV?fO#=!eheBrSRm>#fvBn&$pog~okP)c%; zbu|sk$io0W?J%;RALHZ~G%)y*tNZ`MC%K>yKH>p1;qp}JiEsM>%yX^9oONH+1&;v- znE_@|aODDw*qB#Co*pw|l{j}O2<-kKCtk{pg)=m~WG$Q(jM6Qw>*-R! zb}EnpTMu<-e(M4)(K9j5T)z&aEdKU zFlX+y8V5Gap-A52C#N5aqol|j1;u?NXm3^5Dq$E;rls?sN;C;86e+ds{%Ead=0f#4 zUsgpw*v4(#`1vn~jTtPVlU9&NEe-^^w)YIk@fIrqNjHE*AgEedHBKj3nFQavXQ5{{ zrT9PWga*Y+8FoT}Drx28j~>;YSxO5t>h|`rIl4LHtZIzV*U7678~*FhH?QGCSFYq_ zW#5{%+eM!2eT(3l054LG{3aSJv@! z6pNnL9lPwE^ISBRZXg$QZH*rj{Wp}kd<1G$T z0)HXDi4qaa(9_qE{mk$@{Nq`HnS1`I^e`2|q%Dq!K5fm+-P1kF=&gblX4~dKO#H7~tYcxX9p5{nX`Sc?7I^7!D5N zId#Ca+$F8>OOGKJCV>bV2P@S8U~JtD9x;lvm<%3LV8;$x!w0lwa7NlhgNPyGWxD>a zifqht)=R|kc}Y!c{QYg0Pt8O!gzjuyw#X$Q3AfJaw@&>Z-zn77=Bd+z3Yw%1L<>bw z?HU5d5+61h*SoSpFMz}w!5T;nNArxW3HTjJFQ6h9l)p4&-G<5Y7iQ8(Wjlf6^dQG{oxw!$(h4x#CKMKs;ygsGP)h zk5$I|1wSXm%sI-Gm#YXK#qGVvxV`VFw7r)v^l|=jXi{?`wxKv%@qW2lpjXk3p>GP0 z9qt5pRb38=6B;o!xEp@GL?}dmOK|E!4`u8zH3a%1_5s3s0VYxctG-JsIB_p2s)Z-mSb9Yrk765}-4KY4}OEkrFW%oOGsxY)*r)i}?{>in%i+=r;L~#tD z{lP0~6NMf`hw$tFkqx57{ch_SzTE~o`jMZdQa*^@>Jy3DFjINRn86Bvo=ffRBeLyJ+$8(u#|SUk*#WD*GA^KE>N1be{RFZ z*}ASl2WPbs&gvZ!zZi1c-*rMf!Aai*4x=W29~NQyQw0XeU-`+>;$oj8JxLofkTmj@ z(od%ej<^P_B=QKEXw;mdEDj3)nLs^l*%`LvR$+7$Ux&8!)*U5frl5Tl6JgrEG< zaaHP*s3Q#eqaU-F;28zyYy}v8R>BlTrcpfbQ{wuv9?=qq2WDZB&&8NT3uPLaZh7-H z1es+S-q#}ioMQrPE@r;24huW|1ShtTsz1*OFUUpHnJH1hs~%C-iFnz?p5NssSkKY8 zhP4Nd%){#u(CKzpJ$|tl-mN>pH5WGDkv$=!~(^O%73l> zf*m5Y11HYyv#%ZTUQceuaGvrems?uN0u48r-rj}|NY1Ov+NR8W>yPK$F&E4)dq;k; zZXw=YPI8h}S)fLZ%0v+0%m(Z5@DdmEi^=0aajE;#lgTZUzpC{-S9$7jc-_wYb9PqB z3mg~vGQJo}0*ds8KYxlb%3DnIb|RkDt5|xwJIp zeeA=?ziU{HZHD<%SGnsAeelbzLN;2VkhLm*ZW6QPb1}n%&HNRM{cqCS1{(X;FQu)X z!!aP^I>h>whhekV8>VoOt20ZgSac1f4msi%n3pT_p0omtpSqGr%iN9|v`=g_p5J%H zYRey)`IR4+BF_}|?kJJXg0R4rJWTOH+sBaiQ zE&;nCxt-pVUMCu>x$yu(Bbcgd3Og1I^H=iNh?)MPRZh}6#uK}9Z%Rnk`H9u2B%pR` z+{8WeYjKKS9-&!hWTt7<&t#_M4bFaQ-!9oJDW4AqGL;dqr7{~JO2Uu#Iu#PEo1m6% z;#O<9&31F;rgsdV9;IPE)+O_2<>ZwOzH22lz9e-@xlL&x_#-sIhBS zvX&+o+t?$eY$y5FrfZ}G*L-?dl@rv}L%??pl#Q#~%9fe2D4 z_J&YBOMGM@JbV8ZecsoXtxBRA%L)gM&O|lB+>CY4{1Hx>iC%1a(YaYTE0{@4_kFngx6pd~h0cI?O^tT>59fY~A)x&4XiuFSg}(-7g9{R=e*7bDvb%{^Mn+ zKc^thD@tWSs9tMlul82fWfJxSG1<|lms{v9Cz%zUz@FKU%=IDgqRb)H)YZoIfBHw; zzh+}SyHuP^%+DiW^uJ~pQusY)z5taM#)10r`PxxzH0NVIk(Y8d%)H3#8WH$wQ4_oGFZnmL`$GnhQj)H?>HK zN1lSHUa^v~gUQDxL$#Poz=b}$1Qa3Lx%*ji?-$Wq-xgIPEs^znry*04`9`7hC)%1X z1-;&5=F4^~=TMmRX(k#;NtpS_UV_lAjy2s9AqM&V&;;HBGmliOfg+(q5QPXMAW%Pw zA)&o{b&wAS`c)IzVuPb?A&_{oXJBwM82)be=`DI0&#vq9m)NwQph_#_5jJ0z5Z&{8 zDeI8h_-z}CVe;c5gn%a*WO@md`$N)9XyxyHeedD3iUWGy4jy0(FP@AXc+wz~&OHkX z3cW9w!j9(9Ks*uL>r?%CbpiQ}@5~Oi36|YA5beM3bD&_z8(AIO7)y5c4yeRj2O|Lg zG|=>PVgLdE*$&}CMyEgV24jq{1DpUH931MR(~v5Xs72!OMOgcLUJpst_Ay8>h^OzZ z6Su`l5&-b>nrj1?JA-sGY^C_bA+rd)9_{sAm`WmWL)5}Gko_Xt> z?3!lCjGvO$DE$^Um|wLE9nl{f6jTe`HFMTYqjMuNPn6ZsF#17H=L16qqzb~{&03FM z_9RRMmxks=BUV%@rIqwi{kZlpM3gC=_D_axp1qYkkTwg&GI2kac zzuVMT>bGj0Io!)4PNfY`3-=Ti^>XcA-^<0m;HN*XQCS9{PzcT1HOO;Z{zHa!>;n;l z1n3;iK>d&~0=_p8DqGj16Cvp^9RQ2G!4M#hOXCh|cPP#>n*34PlPBe4+Qmn}Dvn`7 zv(#v_Ck(qTY6dIGho+2}{db$|s%GhgbCIry~io#-p6f;TjFZoZ$a)jfHY zS}?vq?)guD8hd(u=;;zyo1eR02OLGBOlMXq(WFaAjJy9w+9=?EqFtsw!+$9#C@{f< zE%#fu)a>l+p8F%gn2me;->``IBODdMp1cZ*ii)a8LfMJ&%HUTg{hdu1AD5Ew1v2Hs z_0!PL97SXUkt8D-lznXBvYLlP{)o>9Pi04)i$(NS?p(8@ot*h`hiW%w@IOmoop6`2 zP(#y#k<~+zDu}h9z}>lC+!^Ch_!Z!k6o$%UT1Z@S=)+0HLx(D`AeWl`pCXXHON>i% zSvUehiR7mQVT>RlfDodf+hrCc<)}eLliwU?XYo{*VS5%Uxtcw9r(|Rl>yolbCaM5| zJjs_wtxPn?Exv2u56RzaQG=vYZ#ZhKV7r7@fcQ=ExCl!uW4vWt3uH#*5h9UXpv~mU zB>N9PP4o^7jDvVlkqR^qOtM9?h^Gc!MS}`>(p%X5JKM}*L_ zU|#+X_k9g;*lY$v3XiusG^}trS;5o@SGR$+9t=~&r3rEz&cbgZmY}B+-;Kg0j_^M* z&_Rd`-lkrA*noszUKrnle?0SIhuJWVBZupjM{fZ=fB4~y*r3tz{lC2hOYly^|q-VHu2*n_0fSkIpm?1SLCN6FZHdN(l$O06xi=gDYdZ($l~r z#bOdy3%~a@joz=xUzxK{_a^Jse#<`la`;4FU$))l5&fM_j!#EM^s}Uczh%F!|2?4# zkFVU`ebUl7uN~{)9wf(?d)Bg^?uSwb6F0rxQBpKEpB^o!$2zWiNEmbia<#T}I6D_u z@T2~rh678_M>6Ypt{J&Lf+MOaB~;!FJeGe>%WApn=zJ#BNvYF}7T|mN>XjlD8;C}G zAD+vIh=}NOwMh$Fu3|h4fHXbp`iRl#>^r>7SBh&R2{H&sLTnJpB0znq}OfW%kuSJ8$dY)Yn<{{7N zZ-}XgfYPXR|8l8t+ytCYDmY$+e4)$J!89gh0IipE`BUd8$zga*?n0gZFt1*PG6nw}D4dkl^_jAv#Rx71fX# z2JsGwF3jh_;1D7-B3Va}mJ$O6Fknqsx*sD)pSXDYV*f|wl5Fvo+Do*$BD?>UWTjA%+!+HqR)mrcL?H`m7#C>et_2KBr zSGSL%nps>Q^?%Cqn&B83t8WB+iWvCvA&nbAkK1>i;eVB&224Dm1T%A zhvGAfN=&>->A&&pTZwUg*W08!RRD%ryYP$Jo7#?#U^FK znWmIj)vFz~l(tFG_NaZX=>PZ?`Mutj8tX%}Ons{s#l<-mr82F!6g%;m&*KuFdRMp- zosiKG@TSoZGw@mPotd6ZF*~GU*hKE5Fq3Cz*DYms-iC8#8Zwob)3ufZ4o#BzTk0eT z!_H!Tp8PWNE&^4N!D9-Am)@>2Mo5Kbn({r?=UqM1qlQ*0zE-TRR1;#Oz51)FlBR3P z`Y_fr4Vz3fv=9Sh@9m4Mn;9erTu*O`UnQ23Tif8!=gCOXLLU)v`i!ksMzdJ^`meK&?H}|%fG5hx?;!q(mUxdPczHl_(-`UvH$mXCOZ}PItH;h-w8@#xC zEkp;#g)-Z~$mKg{TrD+5aHD1Il@&9cu$FExuK|+oRvDkh!&v6a%8GG++h`%c%ex-} zkP1*mULheDZ1{9>D8}B6}ACuhEAck_5uZ{6tzB1R>rFggBN% zOJUgi0`o(MwnLq>r07>{Xp0vux(_8x@rB&wZEs3Tz4=DR#xl++XVxd+=z&UiGs3vY zOG9#X$Vz$U%YPDgvoNQEWV$fc#K)3RGd{dy=x*p0g%ku0a6e`dsdzG%wYC{`j>1EwDNi>P2H2oHx#|jqKOz^d!!D6>p&}1(4JoI;O zObizwwVgUOJn{>ia1R}4_5hcbQ=cpc9h!9Wx0Gbw4!+sLTvwqO+VjuT)Lph?F9yf^ z^|)u25JhfhccrpoiPE+b{B8myzASL193xPecWev)He`4ykqob2)YXwX!vZuSR?gm2 zHvj`5T{jor*A(ri5}~xG&m99-@+7(ljry`a7{%g&w@K!+sk$rqT{+*+Tb=Y1WWJf? zp`82#>$a)b1)Bq`wv68v{yN37sb4wycOH~X@GMU zxM_P`cs!+_ev6sKww&7ggnlc$sxzleZ1U@H*|Y8EQ^6U8fv|lx-LxqF@TYj^i7CH~ zUi%{keDM$Jjdn>~`iEo(!x8O?ZyaGhki`*Z1<*1wj{?^9Zbv^)*gVYniAMV21aZeHY=>q0L(H0X#$PjXrh(T zu0JE;GnqXZhhV}zhGlf5l;mdPnu$Wgm8Yzzs2u(MGn;N5B7cC=q?$<50t44xM&N@f zT!|>fPiWC#fF_+VW7GsZl5qXClWYS;MRy|Thvd-|rKM}nnT`Yc}4M)UbwYTg^gKdqn|_UD;c%*jZX}p&s8V~xd~zL zjjN*&;FT=0Vnc(DM7l(fq-emO7zQK#5iU?-^?nE3!dQUc&fdNQVNlg18wx7=tME;Kr_s&{<@25JB_QpXql5$HvE#dO*{UFb9A{%+r!rZPmktb8=)^i!#CYR$BQE~>j5_DkdQA1 z(_9;(0miwzyHojsc3*7ruTER+agO;dyA&!G7gtIYa&jBLztXZ30cy-es$1OiL}+WT z?9*!V-KhqfE~zG(O@G}JJ+{sIhB;e2zV}Ay)6LfFJRHtKmWt%F$DlI>H*Zz|bF2^Lla8nTdBvGZnAy33e zBnz2LU_$iun@^vPQ!8OVAWl%2T8M-YJi4w$5c1j@V-kTewFBQghW#IT#)fu*`BmW& z5gpLoIFEiT1s%xzusrnauZO%IgqZl=Aw*B2+4!M9@o(}fB3ks`ozK+YE|&kA*MQT$ zG{)^D=sMjlDFin}z@>noSS8qB&^kWEZeUX-1}y~%v?sQI3^C7!jbMR?eE?9Ao}6)W z0+K+eauj2a7FUyuebW1f{c#VGb|i?IWMP;1?GS_EQlWf_NLDC%Py{Gi6BJS3zQsG% zm~i&qh+riSYjh=h?Y=QK7&1JoX5vc$%)^gyfD#)fD)rn}>x4YsHOx=gaNj0q-^29H zYuIR?^4B~*^5F>c>uh)n|2eS$7rc4}A@Ripr{^nleo7^SP9xC6n&ArFtO+35Wxim; zh7AsFLAHDf@NR9fiA45aC-Mer;6t_sV2F-h1^sAF>j^TkmW;qqC`TfZcGp^dSI8+C zKY3r9L52xmOfceXZ;3+n&vLAFlh|dd6)fEF-wY>xbsqopWhX*Oul7%MA)u5*cMjqX zkw_?<@z6LlXkDDd|49u;EFB3wA&&%+q>|7b2>Bb-YcNza8p{Q(R{4%H$w%1M*d-1s z%oI+F-Z;d`J_?Aq8e&(HVFvwlI~G??XZ+6T37;LV>2NDygIXMf6e{`{cz17K-^av= zhM@%rD2XA{II=~eDxoj`UlN8WwzdF}6*8J0trITwSU>&s3>yd}%v(_E#ju0F<}^;@ zz)$Nr2jGDS;jJ;FB?d{RB#fsD-U*C9(7jj&BSj~Hh`!|JVl%d?=T=0Z6yr{+#2~dJ zw4pXwjsyA=q{ebMlzjR$o}^7w@<{FmBmfJfA{n%EckP~sBry?j9k4zSP-+S?!P|n^&U_8t>mEyUIVl4bb~~R{h!`ttUliT`Oo-8!<#MZ z$M+sLk{f_miVlyg21z}`ur1=d!gw1JkPe+7I_pHOV*+=dJQ2kduKgqxouI!kj_KMc z;R<^)=)#cVK*42Q4kQrqHX>1{EjF?a$IxStYP?72Dv1Mx{ZjYPgDvl&jIcUl#>>kq zhHwwF&j|dwNIng*8e<>Wyn8on^T=;9gZ7@4MS81GP%)K}v;=~m!HA|5AKHMs=Tc=~ zo5hZ09G7hCbD_mc_Jvf-aSn-VLh1+^ro#56(%uIlqdc|I+y-mFhmRbX#CYB~LA>lT zMbndAn~7l0XI(K;E^)j+Lhuceb`{ZkV`DYA*=+I4aGV13nfJQK;wCB{c}T~gW#$~K zQ#tec&71eo_y3_l&RrCo)SZahB1;H}STz(nhfX=iezfb^wN(mJJ2GQ+EydUxl?dc5 zbCeW9-jjbO&{wNZ{8=eDa9|U^pf>KNHv@7&P;GA42yUv$Ze=Q?=p-YDx+X}}#JkJF z9P{BKboaJIt%IVmpB{q9Jz`XWLobMk^}*5z2namZa!gyydeU50kO3mV?_v=|FQwOzZ$_YFTa8dHsxBr%$=keS$qAUo<|8myLYb zgS`?W$#)Q864HU2Ct1{a%84w(=g*(J74lUk=pCmHz|XV#tpNhYKLd=A9Zt8}5lalF|(FLz2>*U%yg^{8BPsn@PCs%IcB5CWP{$G;og^8C#?8*eym5F#^aT;R)a4CZz2i z@!Kv3(W{_VKSqL>;H)D!8L4|vIdrmEH!-IXiz9)%`;smb282Y*5H3MiHxSEp1C|Fl zI>3oAP(Kp#OZ?5``@#ytA|hsa6*!N`+yESRtFBb*A|V5|VJQVwEKuTP;v8FTfOO}2 zyGP@7Pp`6y`aChDzrV-eFE$$R{|@Ztxn0^1kvUCWoQsh4Hcbbc&9_Os!|0yKPxvuQYQF^m#7XUO$9co}qXm31CIyJct99 zYz3WFhloHJNBq(@+~?!RQ_j8r_jg{PJsaUru_cuN$$TfVO0JwMu=f%>J?a`ovP)wX z5ko`dku_wck%S-Af@IjnB%E=GVN)m7c3R^vQnabXgUBf>MNuU z;LEd)roZyz&dBem8cJj(4m*%tVqZ*r&&_l3Y}~mfMx7#igFR!G44w1t`>qL9&fx8{ z2@f_Tw{#oX%DLFjUcn{gSa#z`lsAJxT1D0*99bNUD$mI&3p62P?AwB}v8?jXEQI+U z&I^bkUEhk-@7b_@`*)-*MLulKg0{&+Ic{gd@S^wM{IfZl*0pz?wb&p0J;%-QXBn@> z(0GA)Djk&>l@HqL73uq5gp4X?#Ouzh*GTy5X3w?hzQIB3(O^_2aNJ`a4UFAcosOLj zJ$L_@X~%htf%h686#bQ%#AA)5Ld-OZY}gZmbOLHP3^+I`4bj(hDq;9Kg=4L z>Y#dnaf~d_%ihU6PZF-dxQ5z`kBnJxXV!Z?Rss>FC21?GA!|W>LmN>u(iKr00 z?IQ2;vz=j|ES0$6oBdnG@qhHMFgIKZ`wV!c(zzD7X!32g$hy(Xu-aU$4@G*=+39J7!(y(1GkI$v9XS7ces*Rb{OC(>nj`O?tH9@Q%L^d_G;cJn+r?n0&vOwM~y4 zJ8#fG|6=q#$?y2#C$;CuY$X)KP1H$np}UA9gV^**YAEE{^jB*xR6>rNh=*1davR|{ zjS6 z>pYKRKlXh;j`QrCH|xhNn`vbiIX%d{|CTL>m-LA)3@$jlEn?;phm=3+r}4oerKL?N z-HQ+_M#1kM?N8lP-Q9BhBOsTzcDJ$eR&e*q}0SYz-aJIc0F-e52gTtmWOK40J1AEg$?ab(UMn;Iw&B zLk|sgf6W}M8_ug8nqGS9R$?+vt+iX<3N6jl_4>L=2TGbG?>qIW+o@Bh4px>VR;=GO z=IyR$JuiK@-E*d8Q{#cgvMqN-ZPvf_^5vx^>S}7E+ZW?SQAO)DVJ->-EgAwnu0qJA?`{=sD~3$9 zaJH2H!@8A`%dQmXyJ~$k+ID&P@#9m~M}OUVZ{Uc3TR-ZL&SMlWkGykxe71^x)6xOm zk}Qf>w%rllv`PN`j7K95Eq>f_*}biq%I7}qO-)Tb2ucmvw8;j9tKPC@C<9X5aad*C zzMaYnk|b>HgBhxRigMnxfB!=iD~>vt5IA1Db?ZaK9Ytm36y~pNAO?>+@};I~k&#Ps z8$ODbv8zR-m;JD`AbD|3;!jqh>XuORsr# ztgpXpB^RD*)7r|n)%9~%6*p$LXrbC*NvD`EiwB|+ z1i^L<4O}?uX_9xPH8$2UMwx1HY{dnJ)}EW6({{|^QE7Ap)2-16wf8mlpatIE*C2vM zGy20ad;cHn|FNpZX6}mn*3Cyn)@|O~gpr=^{k4iTUuD{gi_@<@{;aoJkP0KTe;XL=IFG*Y!rl&Mkw~WWYK2g}egJO8ny6ul2ooiOV8}EM~Y2R_p@wkBG=M}Lt zjzoor%l8&rFo?ZDAVzod0ovKI953cjbMU@$&P0Ke6+T=N8EQb&zxAa zu0BXCd~$eg@8^Cy-yJ=EVO_uK$4<3cYyL>h$WY{kuhJ^-z_2LQHl}J!W4TTPi;RTV z)A`~8J%Sco)jA+d6z&@F-0>!C1ZW%rKz?TS$j7zKtp54F#t(Wvb~Nw!AMbnkWT)#! z^Ji|{s&RSX?b0I zj;ziay4Y`jtB8_8H?^Kc?i#DvbtnLhKfCWe*&4aD-=bdK4?k?#cf3m&IRMyMK)8djIjCGm@X5KN~kTH%Q^< zzZees=VCn6{lWFgo9^}=WjAZ)7!~tn3tLVbTs8P< z^?TLxZ0L7!4zr85o1C#)d3bwa$-_S~6@Ok|Ny~qH#>Yjs%5(Eyw5r`ZQQwpdHFWCL z{Gjo+&Eih=>b2(RY3;;*y@vKYYItv^zHI%IyT3g2?{Moht9<&hS)NlGbv=HRGD<_D zx$j_pZ2beVYfNse?cc7Q+_GgO3S(B&KfLY7KK~V(3gP2^UvmA|C^tLut;h)+S#vah zR-ymq%~)XBwT9(E@^}7@AoW-8wBgzF=dT$B=lAu!CerLQPfsOx_k7DMJ4XcD3>rST z;+gV#w|&rsTm)Sx6DKyWHrw@IUSqvC41Xz6}Wrv$C|57032yH`ffTsqJZO@O*g1 z(4j+5ojVtllr*sX>sJ+j|9@=0`cG^4mOXpy*fH1X(;Mg(H(p*frNXI@fy zyEmqc*)Xyaon+1rBmey_>Y8(Sn`@j5-bjiShd_`b=;3ZQVZ`rk`0HzjRjW?O8QkGG z9U1Awi4%sm3U29`2kbrkLmYIwunN1dtE+1u_wh};n^9job{+LZZ=3U(@&i8FmZ6KE zZqph6tkn6odE2lC*WDe|H>K6&pLRAL&|LG^H^*p%r(v|x;`|*mRYO?}ShmB5k6HdH z@*+t_k_wB9i%m5Ql}xAFHf_^AS}D9mmVCO3=j#06UJi~oZeg{YHfz?-l8Ust5220G zmgOXhVyEqU_Uw_9UcXjeEbGMP$jFF&+N6)PAnU??U&Rht{F02cfB$~7@Ss;M1~mV& zE?wWVH5Zn5+V=SkcaxQVMgdyzQqegl$^&}#?Q26xd*XV=SZ8M~DWGEXs|A-gcl=M6 z6BWIog_YHH*?p5-EiKg({lA|-VWq3RB_}e)#3Fjqfaam=8@3IzPLta-B&zZj`N&t> za{uSE>v~-)wcMTSr#JE+FB#fo`sKnRv%uH4Q&MClMlZ&mnH=zLRZPtEEouMh!8mCc z>*+NFcgaZu{HvX&C#*==vuD!(p#^JxN zKg(?3l!pHSu72Iv5}m&z`>)^M^7tPa^4F!D3;#QW|N6~n-~Sw_z@P5_DGI5pdukIB z*Jr@K=Dv9&ziWgz=D5(TO;jZwmhqqAR3a%}7x?-HkLb8&M8{k4QzlMq#gNr|FpXgE zYpe(u$&}4{R-X}E^+Wa+)|L}QM(pmT#P6?-i|foGdE(dtjO~3B&I5j8eN+!{I;Ub( z2g6;In5U^4RJF~TiJuM)Rl5DxUjiB3-rZZJw6xUDi@BAZwOx)(@ZuL2=Ifw* ziKnr1=gxx|Lkk}tZ@p=(zTsy2$PZFiP)MwbkME7xo_1H$QOuf#d6AhrcdqNadEp42 z({O<%HV(53vF zibYbeV$|pCmFuqfm0CW@b1)G`dCUYVBoFaJbTn^acNCvePPKR8^Isk0(hIXpH+lL77KKY(9?ce$#jQj2Ui-YR(ttY4ZS( zeN>)4eX2WPK%14dUt7>gyb1TltR-@xg=_NOy%X3}U_;o1t033=R0c-QGvhMz9?>-3Qd;2eG%(PDn#yG0AY6XX&IAz_Cw0l&vYiYs{o{lYaFsqPz=fgB4 zVc}hM@?>{f#WyhfT8n~H5G|z@A157~oZ$E*ub{P9;T9Ga60Xh9F5P|?=jSw=;%WEU>DW&m_SnHm>dNE15opC^oAJ1j% z#2>HIR2aUmL@c`w#mMOTl?L(JedcOGJ$$)*0@idIu7GbJ&fd)}DQVKbfB!fmGqc^p zE?7J13Q^*s?NgS`!($Vg61v}&%yjI*3kTQDe{w?mgv*t1`c#bdRy}0Cqz*732yW7q zWY1?_pFkk&vbUYSV*%Rh)3Y<1%zJTh6^T;HNL1Nx8?mJ69;P&c$Q#{8?&>S!aQZwa zEHT$y7Ll}}Acc#w9krRFXBXL6zOPprXii_-W+c@-<3Wog@{6pKe zU%GhFm9@qI)0P}jw}vhh(sIG_fe1Kh?8(tM2L*ol^Upt(3(lWEe=0uyuO)TX)+Mh_ zA3J<_9dDpZ-;<5H!!Y;x^EaJ4*Nd%~<^+yGY7e53Ly7j9N4}=k<@-NZR<7e1qiq{3 zqC>$C`dzy=LVvpNYv81cF52a#kta_RLxJ{?XzgW*jROn)E1BMx%bIXL?bd+0(O0{% zx{Pzuvfh#_^MXj^g|%B)9cZ7|U%Y6;{(c<$k^eXiAIvTfdXU`IbJdFVUs!N_4oE8o zZ^$B0F%|Hv-nw-d%1u6Nii0&g)Q65r z+?-ST`rW&R#LTJXRacw6`4Yo3GB!50AmyH7r|NIm?bE?LzW@_Xtn+F8 z?YXNeGURpc4R@L)mKcG;o#PyxFD>qhfSH?@Cu47KKfv#^#gjbsqzeQJSdXZ(mgTT1 z>@mw_%gi7JOChpcalMJcfS|YrgU9Cy@zzxf=)OBQ`;jud*HwI^3($(%m&%(!IBe`6d^VuEVy)TvXvztMy<@=U=< zC4O=(e?=+p^U=MMUqC&S^=13luU|`%IHnOhhO{-;JYKV52jS4^h?AHjE&#j<)Fmo| z0{h|7qZy;sGw=HKTUKr$Vi!BTUFXh9JmEBsaIa_Q5EI7lu^ggno}Wf_MqwkOv^b)M za>b3$#8H`@dB}$kb&ZS~eecQ=kW}95*Q-&YUSsp=eDmJ0hGqSyB6+Xl0}VSHSoQ6j zD=h;ZzSo55Mp?2P^{{XmQ1)0evQ5o01Qab}!E$+TY8F+w2N`U1r6xOWpo|6FWf;d@ zDN%jNSS-u*=wZTPbNb;sW8X}oe@fC#TvF0a$SMma%c!xrUlZ9!&#q*^rLyEksLndW zMW{%x5$uK}C|Q?mMA}x8Ueg^q#(5B#F2;S#4{=H!CEb|!M`?$Y4HTVU!SGIw{oxtZ zkzuSS4Yf|&hZ$6hfsPj!y$eq%gJuaMn<`6DNIA2!*98SBQizc{H&}*mTfe3zdojgc5&M*vlrKSxPa}5;H+tNI{53lCId% zViQ0ocjko3E|equG~h`;e0Qt|Kxn_u+?K=*mRuNYUnnv@uijq9f_<@`(rrwtD?uwG2x`xCrwRT!f6S9hwLYj|`jy8x`w|1@a z%$YMq8ezC*+#1C=(@d|+9<;_7JDhe)(_g<~eY>&w1wfe;(uyc0Kp$a+g|keU3+<5T zX0S9~$5a!1Dl-8nB?jU_*%SxZfu_cy{09Xd2CD>~Hj<&%>xDw3Nx$3?rNX49or zD@rRW#wyj+qU8G^^r)c zf}10gY#TXaM#sQoGs6S${)s}Fvz41HU%p%*JFY;IZexboF-;@F^w}nkWu1{nTVBwi zL+xI?cp?0=}r4;?UbE!6VD3CM+pwi+NQy=qe*2x3wMoCO>}- zu{R2=YuvZ*x}!(CFlKTS`QX~CS8eGcx3aYjv20y++H1}naZ=48#u33|SOb8?L7^EQ zJw7~-SE($?>3-&8`=~#L{n&-gVKrFOU>nA?dMpj3MQygLaEgxB?6+~p61oW+qCFU{ zKNY*vvG>%r1N;I>$cS7`ex}|uY=Wgo4o{yycLTj7s_yc!-Fml$zHfdE>ZzfQri!#| z+2s!vkvBhn>7cJK3l_Dci)lTOd+o!GI~b+bW#8SnDVO>{!KgKFK5m=dY)$i)p~}~A zuWZ@98{9dr(5#vMvUS)w%1h9(yF}dD8UmfLmKfY@th058sq@meuXsj?m!wNeN^US? z)R>jP{*Rfpy8W_Em>j7!d@Ou?dNI{D5S&wxa6Bw5%x6Q}ehaN5M+D5RfUm8a7m zZb6`tkq+A*=w|!Px?E&BK zFo~Xb+2_Mkbsm8TpXZE-5j}RPav=s~rg455AHPfoEg}S2<4yT}&meqW!+tCK@;zl~ z!J>LI*iNyK-*q zzH>}1UbA&;Q!$jA5D;k@tUN?Z+aa@q`FWy6Zed}pQ#bQh{A&Zr)bF#dlncJFYtaJ~ z+*s1;zu1WSJg>O;R?BrE3)M6=gK581+@xcEgQ-`iIV5o7cL6MlIZv8&{rYve$jHc$@bGblcX_HKMyN_@{gXotW|pZaKn2iyumMr`B;Z3O0fN8eTQz83=he{~3tip7n>6hM*5^DOFzMP=OVhZC3g`+ihCqq1psnd+-^+hHlTEO^M)Roj<*OJ1**Q`|%! zH6+68w{NQgrtgejXqu$6@cW=;KWh5nR}4eoeXna~IB#LA?zF_N+rPgpXDn=Fd0cED@;dtcyB&@^ovj$D1gssn4aQTW$iD))7}D z3FM}n8ew?}ljy~S1U*ATW#YzAG+dR-BdyDSh}aDtTuW;DR#oL$Zrif@^FJ82-yGs= zEA8&0xCjY}joqkY-j;ObOutFW%V#V`E%Sf@fydsx?AdaaZBHJOf&|MlY})fm9=AI+ zyL7PJw4MIV0~SpM zkssW%y_%*TG{2Lnm@I8P{XS1tFUSI<7&E<&Np$z{K@`zFsQE03@EfkhKP{F^bwjH1aF#79ByXl43ul{$8ssH7l zLQd4`WxO*C;=ywRzMlbVblHZ@Qy!?5PJ2%fk3biUF8-4q6@j{9X3lEr@amY$m9`X% z>5y>hL@{VN!CeucL=)?Y>+P1!=Q#|odv$5a$>+;B?=1-J^25iE0`@>t<`NPW8#i8? zoIGg2fC1tbp1ks-m9WGxgS1k;5keqtdc(-R9C1wWH#5) z*@U4>e#xg7gCSH-^X^UFd$jgk3(5WaN&mfmc?iEL(R1*i4dCLe2?B$Dd~?UauA6Ri z^WM6R|Lt#1DEHBpe)FOcD>&D*@X1LXHs1X+(}zP0yjwHi+0doe|LYxA3g1!pc}t_; z^s|#IPiAY|cKNxvfi%rKPEVrWFy-2{NCT@wZXy5eTh~qdn+w%+W@aWrZ3Tq?Pp>yI z{l8&{|96Psw8=;V$WoBm=N`|bn&!014cG`hkK2!=i1=xT8Q;lY^TwdA$|3|q)S{;N zYAgSK7${UH>MyKZW@5s^4wNUGD2?BnHOsn+FF=xrfDgRe|Kqw7>bd3n4Qa|~Kpkrd zyKs+zKl@SzRs14pc?(MM2btBw4O_Hmq1vjKB6Y_NR-X^F$$$KD2>4Wj^5+#6Dv~c~1e64=mR&+V1;~T}t0+RW?%cUkn0=u; zMa}UZ^LC9;;IOcmGuQ%G;3J0cT?b)p=oA}>C8J%}u8lr@`m{OIW41d2WD407#4ZNGoOHM&6Z-o8};vY7VkCusMJ zK1F{xzN8!jtdpno_MM4yxjR_jD$6GXS zzLrYhH5AdT?DJg(#^^Hs#6d!!DM!XT*3$SDNg8AbfE>mDJ$kh$>s*O1vmVB(;~d#{ z*b5W-VCLe8b^yT?AuJ4m7bz721B^hw^BY4$>jgv;fyXAsE5j9#&-!yTZ3w>}MP3%=_06w@rTQYR zvQk7qafK8qvJy4CD(?Ns$tzmIzih9*?47UB+{pRh!yO|!THy_&eiRh$i^ZLPtx+j0 zda2hidwUx+uKO(P<6q-K_s;V(Sf8xyexP|U-3QknK5T|$h^i10GDxb|U<_Abe}XA& zVrph4C<~5@oVNctr7rip-IHntG7GwCfzV<>=F|yU$7DonKy2&g%@w#j>Tq!+%EiSW zTQN{PlyMq0fI39UHJdgyVKVokjq^EXOGvsbw$D`hqOxN>r*fyfIqGCUK#j9kw&1DR zQ-0-z>Kr<_3eSi5G07NE7~w@1_U7%|*RHIqZ6OLLYAf$AZ$}DPE3S^s6P;+OxPsQS zkPRDBsc5__KTQ#G0>#XA66WSzyFwv91j4r~rm0GQ#K@7Qh4>LJDWEdQPA! z6Z~G#x^?Z0j7Chq^fUDgu3ibnSyQsIPEImnZq&rK&6))f^rAQgVJ8zqQVzA_JgZgQ zJpDm~rhaUvA5i;se6W}7nHk4G{;Lij?nLfxc2iYV^&Yt}!ap6XiCDgf#0kvkywH1>FEGxJD zSdrAauXLI3*nj>TPfmgq0w5rP3a9S6^q`g;Uh~)5h*oF z<24I`a-kb#OfVoi<^Fw#{6Vjh{Z%B$k=v*DA3b{XW@OJ-_i%)h;G-n@B+*`l5US5mf7&nZX)R{m&X@px{dBN;o) zmsgC4Gdi5QMZnnm_thyn*YR*!N+xuxv>CL**{e0MRkEOo=egOhgtnB9zyI&Dm~u~@ zpsJw7#_y-j zpI4B`Ed|cqzaMI~uwC7>foQ*Nibb31UH`Z1I-0atyLXfa!f|X@4u-#hB=ecBme-_R zVVZ?57YPPyr5t(6adtaTWy1-iT^EZg; z41!XfKfh#hWf!1X0GcF~Z4e?|8VpI^$B&H|^ix<=BpMA{ea*vvpxfnv%OngFg0422 zc+R$~aT5w{4u>P?%9iSAJskniaQq7Kod5F7w6lp@J288K#DxR?&+Y6%!Kk|(u zF!FM9B}qI+NM3O@0&=0FcL_YUOLb6P%_5=OV443$k3;mHHsWK#y0~X-)|+)dDmd$q zm{%9VV|WetQ106qznDAsia?htJXS>ZIHQ3BwOOO%6ON-!2vh$8PJ|c8&>=FfU27J< zof=|;yu7?{AUI$4GZ7Qn9lk?3%1NLBo#SZ_ll_-VS9x2Nhnx27X)ScR9o=2v5uD>z zfDRhGcZs0r*45Ru@T_7}NcF`&m8m4`HI8j4`Ru&<%zt%k^hw>_y*yWQ_`D>6C1acd zfEbbmXifQYx1Tw$+L)oGT6p3HV+~jUvoWAkJ88n%zOojq;YG*Z<>jqHsxrJi+mzv$ z;5+^H?d6eFvlg<Dg~0HqsXol4Lj!9`aQvS9pfP{eJ(Fg<$Mrx=_=(ybOEhe7ngqNC;)fA zMQ;smUwjJU6Si86EVFggOA)UO|Jq4K5QgT9C}!BcofHU5^L&wJS!E3Zso^b>+PPKr zPM!`MU}I%vWk<))9&6-$O2XRm*P?8?a|cs{6ns(qGKY0(+`D%gy1P(cW6F3@f#(nc z|B62;RN8zwC?|k9j<8?@NC2Gwe!HoyfMn(+}Lack7l4u3&3QMLuz3y&$La zOG>7)1{*eS-dwAPhwc6++HKo5#v0SUV@E?Hd$?8ty^JKX-|QuvswY_hk-z+YI{FEm zjNTS@(VC^~jl{Z*;qQkkAf{!dB5#@ZR4|?Y0#W>i6rF3@nlI*%p@UR9hTs2iaoSPW zaojr)v|q)CmLffiks%S4MGrcqSHRMY?Z=gfK#)21QD%xsRD&^wVIsJzZA1(?Lp0c1miBT$ThO-P$(F#^xlB2_r zN?}+L&uizNJpvP5T=vzFsecP7$0k3iq-8f3{tS>Vpq-$wd0-S=QFN%OS;p4n9dO}@ z7-6xb>XPQqpAYEL)76#mWD{l_OqKMvW}f0qK^kclaGRv@K^+c{(k5!>gJ`I9ud*j2 z{v30An3Gd8h8u3+BOXF^;8PF5)GDYod|4E_7Dd(@)P(c`qHP2x`DeWU;Gd{NXDu%* z$WVdi7(AoI4%qm8bB?vEr{^yBfxho#-h5=}MfCKg{>|JxJUqe+%DyNZ(WogXK7abO z!*^%fk#5@;Em}0@*^TGVCr%uX$O0uL^dR`fW%_4RN5w6%5ER+GAF$Bq^OHKjJVFZ0poGvZ&; z76CUFM5Ynq^nE*KB>P;$=68x}K-d9*rt65#c00Ol#4M;P^cuMTg&)(&T>M))3;ncL zb{`lj;R4%f>aEYYNXo5Sw~koS`OOZz|MZcq=777#LQC`TXb&70dj19-^QmzGu!7!S z-;ZU@HAIQOMu2d|AHo+m3K**1Eklbd2@iN`)zs7ivBTt2I}4eP;+Y4#NUE+d>ED=G(d*N7JO+|lixxZFlRj*kow0F+ z!YUqbf%8dJtfilNn)eD#F!op(bp6sovnI^7*$AA-#XpfsE4~`Hi5uev`GAtvP_$+~ z9J=4R>{B~kBK*0Z_V2X4mo8mO$(K2^h+Z5z9S%9HWKCZUQfhjN-tCwmc>>S3>gpq2 ztruD1Z_Gfyn`2(?ebryL_|tj}#IAtCrSF?ak9ke2ZgS0gZXUJFG4u(f@n9#OGi^E@A%0Ot1?Y3#- z#vv#x)T1$BKd3(yHh-O^G`7raOpqa`O%~={F-J88icYp8P>HT8QCh?PAF`~4td|0j ziEk8>er|So`DZn#!C_0!K7DS`vLx!`8h}A!W1CF4gXl~8ZxE&0PYJAK-u4jZv=j;? zdK-p*`}R%bPRv(7p`@L`|16;f-sn5*O*CAJ6bN_GJXWV~-@b5d3f*0jtr@H1DCWJY zJiKDa(3HW9*(aJ`*CR4rVZ~JVg~l4HA8a$JJ08 zN(X(uG^B~HPrZY*P)^LR!A#QGev_ZUZ2T=&pxDpMBZ?%f3fnDEbS;bUAaTKQd)+$Q zL32{UyFP7{e!`~+AHu4lqM~rP2wxMbS?{}EdwS1N;DVUQI&4nqh`pxja56i4d1{ca z4>|3Jx~J04qm3q^LP(o&_EqjmfyX8&k=tV8cb?tDPm-tG)vjdp*|`%$vEjFpE?*Wl zouJ_0X-rm7?%9Qp(n8q&f`c1ylH(umeA!ml=_w&t)+?Rf^m|Gc@N5y!r(jK#2rtD0 zPKJeq*p8`>^7k8N5aN$6F`4xM)W_ay%oGDTx&w_(Oa_BEa*@_ap#|TJ(kk~IIG_Xa z^U?l=8IwBg-M0__haxpi4dU3nCr|E<(YPEF(?KBdg9mjnJ~tzvGzgDDMP(>is}=s*9A;)R5Mg$ELZfFD~Cz)8h5V1{gQnXLRI=39StQQ8RU;*@y!l4VvYu6~nTwSkDw)+Xz^u4eZiZl{RF3*GN zNreO|dpHlDsT4@ip52x1?!;Wmu5Y+m^|od%sQKpI#>_{NeV8KDDBcO|=%iwu zF(HDdrl-48lYOK3yN5GH*iG4Qk7w1QF*=`{r4DJXhOYy&KrT4~P2@U~!fIj~EiKNG zf05ctDcMxDj3r#H4W6xlR%nxHT~UQ>_xGQLh*Yv{V5r$&ia?Xjszt%T$c!yqh#>PhLQR{v}j@k}LOK{}y@6FnD;OecQjZ@B` zQwdc_l2`##g2ozM_A8ZT!v?_%ZdK<=-Sb}iAMKtzmMmwa0D2@Eli9`oe3EA%ylBuL zVIYAN6fc-d;Y%jX3y$ z*Zn_owg2l4LjGUTS?Y83ufP6x4EO*4rTxBh|EKTTmd8R{6@oJqE(#g{tB>x@xAbbe zX7y?x6tMdZ*ZgzcIq4@{x`b>*19{&kiL@I%;}?_ksn1q9E5DG)T0mm{%k%3s*A;lQqg z9q39pmpGN;TQEqdW8lK@=gW9q1&LmNPv2UG2DF{|cY8FUVI^t(8o;uYiK~zs!iJ4U z=IJ@lrUa<9luV%fW?xb>lB&+2y_t$S2Z0-%HRivMRMvzY5TRK>ql-rh&1AH5z(6x&U^LXW`gd z&_7cu3Kmk)3XTloMh+^<)YMdrb#lHEFpzcYbVC@7f69+C8Fq=^m}({Zgs|-j2Z4mQ zK;@?{Dg{EN2`q~C3iT2N$TRh?KKF1w8HYr_9)9CG%-rM0kA<}cE5?z<2guW|ysfaQ z2w^_lqiAyhpw9v!~KE-ZfBRa`W*C(KO{iZ@N0qY*$x?CH}8 ze~q2j8~4}H4^kjR=J4<1f86Z$J$ZZw`Z*XCA{r3pdU>JSili&zYJ#)lb|y9+_WJ#M!`lA1UaKnsGxvoB#_beMRfHij^8$2FQHhc# zQ=m~!oG+n%JIJD9(FOu*%WW0V3UN*Iw^ib-5AcEl^o#Pu(i~CvDiB3KO>~@a&L6Y_ z+P9ie*ttJc?{R&$;}mscu_90d=~nIE|NeV-Uf?thOj=r6;@;9Z3zWZ#ck&c0kQmc| zI)4Lf?CRkm4`l>_CYF`x^+eI#$n)&-H5)inl*3u~#yxvlQSak(2^97yZn^2wvi5uT z?tM7#I(3j}0D}<~cj7s5iEx87IL6Q`&)QQH8xNXj?7yXY9(8Th&7TPy#=(J}m2k8-q7@*f_b z)It*EsJ6`^17yDH3;XtIOS*b`GNPd$p*63dAef9mEZ2ja=WYtN6d(5~hF;)qwD!v3 zw58s3q6tC(4}q9`L4$=r*PjV?0*)P7j{>T6sz!6 z+*05utwAd;J~tIEe-u}D_7Ka&g4+t;WbF6ZHF%j&_1Pb%fNZ=GX6xp}BP&Y0E&KO} z^HANgqKAp_D3oA4YrmQ|*caB^Rq*QJ9%}_@!n>yP{Dek|7YrKEjRRd`CnxkkQ4|$Y z+{!}|$8fg7s3wMBKs1S2E>MGv^lmA-SxnsIv6ed^Y!DwbH8mHJMMM!C<)LsVn(s$w z&@j~D7YQbCiIXg|+LRpu4*>k>b4hp%+J;hQp3b83}v!`Ecm8G7fVGwQcedc1ha1#uIUK>l4oaaB(=tk z8{1{Kb2%Il5dj|^`urPa*F_t;Gs2J-Ql;1Q>YZoawb`HZpq2l*Rr*tn6-&8C=f#~JRZEG{fGAlPiX zIse5)y+{3)R~)l^Mqr3OLwjY{yC#}Gs&D?p%I;YAjGm`3(r_3ex1BePtMj4cU^SX# zMiGLJ`d@jI{(?=n=!4Bm?_@Z%xDS+Db{}b*v}Wr!kc1q*F~5_3_2z%qAB{3+>PTWG zMfh^=16U)nti#H4tNKupwSzjM;;R=XBtDrMMV*X<+|gk3*I4-~_vWu2QLC|;#Z&@1 zBONBp#5AG2Dd^rqVs?ywZ}L=xuV{5Z{FNjjVMX}9=_&W-PxN-XB@g8SpxBqZ;?=qaacIw|n9{ef*yzS_%`arJ{VphDVS0g&c|4#y zRIRsqg4U}54Cln;%2%<&Tae|GDiw?Q(DaS84 z)lCuSb}!Ydo5HltKm_gyZ=tb#df;Mh$+NlDCc6&G zOu#u%N)W9~LRol}>3QJojuqlOq)}e`h{qOODY}q|Pn<*?_tg~Y!RBAx%vqx=RMMW! z9ByF9{>ciSPJR*{*QjQNbZ(--8BGaU`J*P`VaJnad&DLxs_NPgllm>iClGk7rTjwu z1Hpr*nqTq8BzB~0Ph;bZMf)7mVL~DW3XhCK(lTGqKi7O901CLsgSNZHV|Sf1 zM};gdT_w0~Aw}j7`s?K74I?pcfxUF~^({X>rAVIs&(9mBkx7P@=r-Lp=jDeM&IKQp zEq~0C2Z=LVZwm=g-ZAyl*xz*tU)Ss4$dx9H$$B?*s*Sz< z$*VnE39bSnLry|!(TqKrG7ciLD5uQxyYG+yb+ZedD;Wv1Z%gDU;fa!r8G8@&-WH*6 zj9s(McRdbzl~s<}T6xBF!z#hiL){1$2$TBjI!D~9EMis`LwC&u=Z55Gk@sS2iWJM_|6a!f0>l{lET`(^z1J*^>_9u4*vxFsvm*Q5#DH2&>Zv z7=pgGiB4nPV#`x2W;FC=MJsm=CmPk)Ajb8flWFUrD8X_Pfqnd})6IfNnt^p~vrA8i zagO&M55$Tm37}Jocj#Th6mKm#=3uf9tcDI%2>6!QF!;=i3NdwE9rpeF{9l%q5y@`F zI4GrDT7&Anf=u@}BM~-`VjT`CF!*-M$**ea{vrSJz;r&)TBu|Y3d$b>*7#ADUeFhjY97Ev(1)$ykA?3E@@DM z7^TFIbsRaciN}=Gfh)clV;KtJJIyDyp?q-EPa!j#z({j%rElHiJZMvw&ipa*O;6Wm zeZhEiZXxzio^JWHyGf%)L+}>};+R^e`OQRtaJwl3beNilhIHg}eV4QFE!s`q+@-O* z0Bt!dzBCFvX48wuWMjCsua$5Gyr| z3EgwJ#}5%BzCj8^_m6v2gk@5cj(kgWDS_sAuEJc1;?R(ya6{|9jj>WtIdtB=(MT7Y z3)3m((JQaX*AGp*!EB`hot4JckVRiN3{CR zYXelc$CzKp&&1LB1|bOMS84sAH&zH#1gdH7?rZR{y7L^r(Ssq5fwG)bhr}tYDjqXt zyhfX~6h>MvuSOJ&yuTP=0ClQ{4Wm4jhf~)#f~ZC)v7B#s!)9MDfB5H zV12$3VrXYnu3>r{;CkVS+95VJcy$F0Y-?*9hzcfLepn-N@q`FKo=Zb_&z@)XpPGH_ zJSDN0=&2#SM8QZf0Anx^9#^OsXCN><)hm^_Ph|QTgi)b5LmC5D#mFFfI`508FzG^L zp_VeyB{F@B3x+n4dc{42QjwGB&i>5(dCjBQY34gS@fJ`EQ_KFpJa@N-RYTK>iKn@uQAWF!3gYwS|9h)i$E>G(KlMz3j zBRHM#Vsc~jy|-4EY{%}TM=@bhC(&F3+~A%I2OQZJgHZ#~LkUkNT6v#DZufhZe(KMY zm3gZtPxWWmi>-b{+LW6k}s>1z0Y0@On z^3W%FYU&ZQ>giOw^I!qN=gq!#zfQ4$SEjxfu?oMMbNp;gQ3pfpM8S2mN}mZ|#I+ov zq0w$SJRqkv>jqi({7l9KNJC2xE*Zc)F&+u*M37L}oFFAR&KDUM(uoW;U+TfA8; zBioHwe+9*~z{e*QDbPpz&hjoWA%q_(kP;1h?9fi$Xt;S`1femBb}Z^UJti+m%y~Hy z-|~Y{3t>q#xl?fHs|mvF3uAo#w1nArDfY|Z9t;~+-S4G^2E-eZx$#-3!97C<0`?IjTdLWmle!a03LR0VDavBy8F+WYHbXcWCuc7UT5sWxVQs=IDuMoyTb|JLV=z||u zxZu^x#T>+7q&+9B?yo0e$VdT<(AT!H6yc{|6#|7AOoEk*!6cKPZWuuB_C7+qWTsmOXsO2(f#>U||BG{aHXOzW*8g-L7}<-s$Isc^djh3`~S|r}Ee0 zpI58tr!N+!oCAI9;1z{lf-!0^SW{pnkdG+tgg^t7e^kLNI|%%a9o&U?kl%tuU$bjh zOVKwAk@lu|=|gezf4pQFPcFWt)AHa=Qnj(LjU01Uhxla(l$ay)nGMp8j)Cc-=odUT z2a{Sb9441Z%YwH?1ZObX4G{LDPnGQ3qGw?e#<WNRcQd@=O_ zr{wjtv<8S9X+Y_PNn0oE`JQy}O?K_4vrlsT+ZKoxAk=(ZhK-3&!i-6=5N%^6O(@v| zbiGOMEj6{cPO>Tkqb7!%h}8o2VEV5a%&hB-8A4cah5o_WdAIUB^B5E?6`{V9QwRRA z?gn^d@U;b4Jm#)0rX6O->2ONKHSR#O_buL-W|WxaaK39GVdEOcel9}TJ47iIu_+U z<~mRy{^j?C-5hgix+pB1=FOSYoRm9Yc47OI##(W-y6#19e14`th5KDKrG$mqhRW?C=7EzQ?i$;Q=lR0O zvUY|LZvjykNzC_(1F1n-AJTN)%?;1#mYP)>F(ciH)Uie{C`mK`UZmL?Ic42`I1BW~g# z@0vDy`t%NrI*mC)fL{+)EnFvd--TS9XNqrY)AO&8C==M8T{gG~1AblCu3gh8*T2N; zFp8j)6hg@vcl;~|R<$v2h_Lst?cv=OF>wTvFbKNN8<X3v||@1%nK z<6P`(-l{o{6BI`(z(dB3L2vCCYc=*nB?c7b^ff#hH1Zuka<~z5pPm{#J)0;mKwR7j zqDQlnJgTRhwmmMV#mLO`Y3zt?Hz~KeNFh`vab7DC4mdX@F-Tt*;mn=V!58aPzmD3|?mfQab$j;90vZ4k3K4F4w zFTVo)o%i8`k^c+^e0K2~?&o*W$t7R_lQE`tehf-P7V2=^9suX5YEQd$ zi6Ik3-=E#QX_x1`o5KPevpM+gK34kRWmL$@-}@iFTSnwll2V>Mo8YAll-17?YE~X9 zq7(vA{$?VUc54ioVkJ+*HcTxiIx5pBG9v<1JnH^B@oC8@vbn%?!jT=!QQFws)FFP= z04f?ybOt%KW`aqS7$hhr;jsSH8PG%5^k6Zf4vlm4$9|pkf-trJDfA|K<5%N2*CcF2 z0nuMyDoJk2#~k;p@jBtNsSGC z%R7g(JzH?I8$Il88F2*@fAn&omW~b($VhBskxITcT*Z(DNf&OkJK~l;(5*|5nm$J) zeqhWKVn~n34L;9MU^VtiGpKOhF%2q$4i7qpZK=X(Y(ZqToB*`-lcerLHT9=R}aXU%-YnoGm!8&i7* z+A-$qtBc-Ni5FoG7l;`UMb_6rAZc`Pcb<})M5nf8HugXV1`^6xh|ULq>Iji&^n&1% znzLxf+xi&14+>pfZa`pQz!C`xe6tIHh^y2zndZD4ghV0P6Dx>bz?fu72e44Rc+$~1E#_g}E z8C%hMnp17Entom1ogQ^wJ*fCRlOWeDU0lrVm6V8HO@+>r2mooO5pl`4_|MVi9EX~B zAqt_^((&@~eF8jt>g9<8fh@*3UQARehuz7l9>>-__*FCbLrEQyjOMGl5BWtv z(dID1hTk*pj%h-pe-?;=sqJ9m{Vu(l+b$WRbEkCy6PS?Mi6LB1^#VO`A9G z#De?FxE|R+5$n|S-|5Ix_L9JvuaxpK;@QBZW!BYe${{MV$|xMC&74U-gt&{H+oeus z3yzl;O#K`tsiCfoGrTqE(&{5BJuDT)R7GGz>pmm$ec3FN^K9nu9`$QW3~ZbC>T>5P z22LG>d8G8di|?1 z6PC=JDfkwjg?W%6b|0r+_KRGwIx@DY20c_AI*g&>wvU_qV@fHi_PeK>YR*!chqgL? z<)D&MgMp*Y>NKTQiW(&Tkj*;qn4(CDA(B9**PWzNLXD) z$9XMWc*bB#;yFoI97H#|f77&8*zVlxLc8;;MoICdzZK`ULm|mAk4Hg}pLp>e_t7QAG4=qP0?u`rB zx`{zxg4Oit212$PK$q{_7Z)9{mreF;t-NqeXw8Mtnv&<8$NXuVS;}I|+Snu^^VTtS zHelavbvKTTJH8v~?U>Wz>T*wqgoV>Rbs#5t*%#jl?bB34sHjCR#*S*Etqp|4qal0z zq(Mf`O13H`?$Zt{>vD*)Gw$y1I%V(puh|K@efxQ=oL{El?met<$0QCKmUJm@Bitvm zhIh9`jT57iXE7WWf5<72rDqi)kBmeoU1offsxP}y9u}b!kDIOykQNE zE#tfzNKBSZBlivyTWEN2L*)NADxn7zglRZ;gXcZ;@dil_7!jM&8DzK8P<9xU!fiwAJU zX}fPxR)fGYQx03PVxSFV^~3WGezb1bY1Ceep+nc6TY)Gp1(I_jftPk41);zW^gORO z@eE^gMRj8F_?;b^WyE_NHZo!m{K5~zr*=E(;&g85Uh2l)Io>oh9inkpTKHq$t&`*| zxs2+m<1g$ZKqyE!AjRMZ!(tA~om_oMw?FtM{y=b_K7FLk`IV0I-rU^~vGS9ku^mdq z&~nKF-u%(y1jdjFMqX4LbAuksV3%#LSIDUj!471 zZ33g#j^1SYLz67Cb8}s#tK5ith%QBw12ON2ZYsi=DA<&}psk{F7Ey4dEXna-hu$KS zQ9UUc+>c$=K)4(QBM7%^m>-9aLT91v8eXK znKLPZ2k=G}cZ~6B#n3F;n2pKuLYe1)q=-AkI8Yk{chZHM71#~v>XcJj&=F87W-ih} zjdB`}@9O%?ms6hi<)=Axt-tWYiyn3Wli~Ma@OBh0Vq|yG-gD>}9BvTt{^s40$t$Wf zfF!}^y>M^*560dDp6j;%8~#a?mfetwtP)yer&J=5nKF`9Rv{#_BBYFjWHm@qBq3W$ zR(2vYJ3Bl3c^~RL@9X;C&vQSw*X!zaUgw$L_r$eROPT)nNY%^Ge()rreMXz{FeJIu__ z&s=t#>I?=i&5TYPxH|XTjTu z(0)YSf=ZF%in_U#WY56#dj)DO_mmMQ*ln9wC#L|lPUJF zML4^&okZ1Pv8jly?O=JvKu=FBtdv9OcW@}KqQC~DzFFgxp?fE8V%4M{5}e1<)T%D_ z9Rxsj6oz#x?G}K3Bm#>_Na0dz{qzm4xYgUb%ZFy~yIfAaRDDeJ^h`YDnsVIZ9pf!E zKs1y4l6aEC-G;BZIXOuL5ll-aGoX`I z&l#Qf8{ZR#ubA-L^_3efP#2ipD22s?;4%0}oC&M=we`@x5sYVkZc?{r&TnWG;vx9Z zyVHI8XIKl<z%fI+lXIolXp*o&vikm7e6X>7QIrBAtx37qR zO>-0f)J%9#&~vNLckMViIXijPMW!|=A4oc4If;dF$>oyhw-Ef8~{dF%T>W)S}Ci?21aP z>kHha&lSXGWo6aV&RJpm@xQ;;^_AEmT##tD6zB<|itYG46|Vcp@iGbec)1Wi>-ahK zPhNTf_Ms~sg>>M`WzK4l@FVyG0+aX+j7>~vDE8BXkomAbeafM@q4d%>$*H?bvHRB+ zpL|3t>#wx1ldGIdKv6<@>+G$KXxr(~QEZ?2IR22`yB8@ao1CV{Nc0mUzhib^)8><3 z{TFnUf(x~t9A1{(ZSam0$>(zPW#$V+B|yeLL0O3A`1A`)Chlb+S+BQQr6-m^k`0`) z09`B-{il|!BIzym^fza@Jn}Xe4{?pf1w490^Z3WAIoCF0T86J)p8E^n0DF-ORblj( zJ~B}XazDy^sK|EV5h+yw`F1O}TR9amu+cQ2h9e18=zyMLvw4q5AfosK7!L-Q4Ew2i zpnKSp-HTTM6$KfA0MPi*b%PHuIz`h8AhvUcvj0JALA*;uA#CoO7x5duR13W zR@D3s@})L5?iTl@#*2l2dc*=9#c>4r!&vKqO|lJ-sfjVZQiiGI%*;qUIw_QfJYNNN z){CC1af&bABIT?lcZ@jJNL!AOVg$X)Op6r6lyY5}1DvCX?<^CTY~ z9yEmKKf|RE;fgC#d{HblNl3F7*9zsIX3n-Hq_D}O(&C)k5?T9cpnp{=`~d~{5;*4G z0AL~$?7#|=6xCzCZ1+&9^Na`1Eyo~|mB|8AYovPV-_nY0q*7}9Hru~dJAhl7r8a8f zW?`Q`&)b}>L(LtJ2bqxloLpqy(pj6aYOYsjWK?w-O2q)~80wQs-0a2ca8miJ7Y3{& z85fw)M#xAsMujTp?2n~Bul_{6Fy~s_vqVLwSdB!p0iPvCccRRK!2Q7>TVS;sm$lA4 zn3|_2QA~2oWo(<71;bROz3g171;J4R2M_W5bS{eMjEvlH; zzJLGz0i}qm(M-%X(pzXMjaZW2V>o;Wq3u=fgJF`2qobOkTfvwStpo|VCpQcGN?OFM zzhdNdNc)p*-Uh2B=xB?5=sK;pHO6Y>k`hK?=wsFZD}jaOF7SLA5}KN{weY>=7tHe6 zs|u$M5h9QTUOb0aF!SKjqjKD`R%k3V8q_2F{Hj%R31y}_Hgcawm-VtU5sTp4-NM05 z65($^pg<0F)WvIXv2szVG4-;HEoVq;qz~b}d}82xiMMEM)(fVXP}Vy;pGal&`z|%O zDf*Z8Ia`Tf0%(&|9?_Vh39+|m$h5bn&aTM@pvFJ#M z0VF(RjL7SJzJ;|#mpOp6PJMZ^h`3tD1yGujixx>OWeReknp35L4Cvn|xv(^- z>0A?m01euX{ev2tu`go|*PK=#MnFhU+f5ihC23PA^5GQwh^pBt^9?53^D5}AolVuH z`~cpm{&Fk!E6|!9o6cE4r$e)?Z_hS#S-Y;dY<&v>t!(-7S?owmKHJr-L~lGJ9SAV$ z)f8;Q7v^t%-9B|@hFm4c1v+CDq+bDsk#c1 z1Vn;LVt%LwyAhBWM+qTOb2F? zCFAzvfbxKBuYwNW9YJs>hKrdxmNS?a&kHA|Tg$F{dXxaNND#fE<9Mqp=jdk z?Hynw>!>kN@XQ&3O`8%pVhYxDOcW0YJDtLTxeI6{{+#JgU6xivRoL*bNGIXojxl2v zum{`vxB?It$6O@p<=MPA_Ve{rM}XoRBw+Jli21XVl{mZQeH|fN+@C!^ZR)<&4_}cu zr-4i%`SJO$*}dRX79->QY=)h%Igw)ZmgsLz50X}0^d1rNkX5KF}4__wi5v)iTDRS4Vl8@Utwcd8+up-NJ89B zn$dfUA-t4acnjY#+nwGBT`?s3RNI%5h`eRA%sk?wztE78Bn%R_eMxh1(W-2Qvkb~7 zTEdzWH6jpbrc5Us1y_AH}c~$o3)BV_%zx`N`nP zkdr|Pmo{VeA1pUu4ZH1)mWtr%CB87|dnX~dIALc)-UBnqHv-L~f-w`Fm3;eeEB2hE zF5C$dMFXaMOPblqCR=QtUm_&V)vtiLg*3P59YfpgLLJXx7IlK7#M5U$2>y_sa7vSO z=P|8HlrcoUNQ73duVjvZdc0Tr

h&9)rU5BqDD66Ltt1z+?NLx^D{r(a>S4`yARE zMkDetu(LOr*1UwY{nI%cuNP1%1K22l`->P1Ucb_axRPO7%OB|L;f*ewG(VKZd&7Is8s16NzyIcUATs;{(;0EttL@}y&(_kvVn~U`rdXxVmFk*t4QAvIYGVK6-)Rd z40jY&uH=~LXp-DTaYay1$F30e1mDd{<0F%k0u>Xn8HuL&PeAC1>wBzW4bN|dwe}nL zeeYxPaGUS%$XNkEEum)f24rMzxalcN4VC74iW?}jV*?NY0nwzeqkN4ON7OfgMPE`M zLQ6<^J^&RlZ)QP5(wOhtyZ00N{wOPg%n}xMe0JcRkm!L$A~5ca1<1$P z7PQnPg#2aGR*}~#7O+K4)kgveIqga7ir_TM9@Jt#)1zLnFE>Ugw8^f2wv5} zXUKbYR7k2o2hUejwz;{vI~x9a4d8#O6Yx{ZFA8>|s$aZjIty}KP7kODL} zn%kYBAjCe$1JU9HDOYe>GkzH9bNF)pD>&iaWXE66+87dfx#7$IC`8(G$k0)%ZqA8 zb%MAQsSY7PCRSURU}ZhTOc&i^43(KlpA({dt6H3az%k){HGb-2*LVt98jZQtbSWD+ zZg_u|QV8ve=U1R-s0f>?CqT>--RBmO;1nD#7w{yvVGk!ko@WWMvp>Y6%Auo3fKY^`@kA%uF(!Ob-@rk)}87)}Hf+em%MXVwk*3(tG5DnBO-dAo*(*QRje0dkJ zGgMVN7Dik^&p03pnM@@->@0fsEe){TJ8IfeT6~9rBz(e?j=aJ}?`^bdSB0$yqr$}3(?qB?(yg?<5fIC3m0A%1?NwxHHLp-%BcHGaZN z3~xoOgBS1+Ucje8R3vAuxxL2OvyTYFQSUu;cqL&jFKx!g|Df0=#BZC-x8ZF2QKR7^ z?#Pu87h?V>Pgq!(4ED8rfZ@AHe)U(b9MW3F-g8^J{q3p!q#(fvEjhBcK=Me~U{De} zVY((`dl*l78nUpl@dL=?g1M9f@ZTaAf{|>WKpcklor#ASfU*s@<(ZI>O>@@6uat2|Ob)V7}o)d~+p6465s@I-l|=ZG5^g%xy(D-ksy z`^Th@Y!3c|xIafJ>sy>A!|O$XAYH)A(d2CH5dKss@agQibGzOZ4cL`o=a2RtrMST{ zcDOAL%!4xf+w}``rF@Ev2t%U-^GTEF1UJggRE`|shZpy4*Te0m=@U4Y@`_l;rn4}d zB+73kK=AlC;VAXnQ29>X**-JW(x~&&*|;OfZ<|}2{nn|O9SbhNaY+EbV}3mC_m%~8 z)$$Dw=05XNh{vABYx#Gk4lWN+D{`Hv-blJIkvKuaditeG82xl7;o~{Gazf9blePc2 zm@8j(XRGsXY07#?nA9~TA$;mtk?~TD;pf~0DBvE0d_31AE_na_hVIRa8)h>^xtx@h zfJ~C@bW6xyL?j4{?*$eYOWzX>n>Rkg9qa%+UrS3AmZJK(04nY&wsjm(hskmPNlE-Q zWY}MzIV8&hL@GxDj!tJg+Uo&AB`CA^G!+$Ib$I1o=D_mxsLr0(~$kzfhUUmiuEa5}F%Mpx$^G@-Vl_5}tGNq}Fbyt8~c4 zxM!RUpYUK3Fj3p%h>R+GPtW4W@as}gz!0%8JVp6{e4{&Ir$N&6TGYH03ig6(HP5Ic z_-!{NC1DcdlRhQS{ciy!5|$}0A^QTR4uVM9^BjW{fvHhboTq!Zw;VqrGjwY~e<=Ie zvzHS>J}$;w0nOXo-8OAIbaDF&9<@7ir+L-a)8^l-y;FO;cH~0Zg%wQwI?_jvE?af< zwCYXQ{k+OcqiWYH%U<3WV!yC*WWs$T z=ETMi`gqE|j;2|KOQCG|N_`iy4fY;;<`0QkHTq0?28Jy&0_i{5(TgHWF^1NGmtENw zI}5jnNC?gy`t|F7^bQhqtx&UWz!rhkWdKFU2poj+C&p#$U{9`t5c!BIxV{;8M`g^6-AiK-}k9spPC?~htpY0Z}PDEl~e?ea!LlaWrm5cpB{Al@3_^vHJ^ z?2Rw9f9^p0=f7Aa8w6dHmS*=FKH|C6iT7I#J?d z_WmOk-A9X$M6&R%Bmcsrq&xcUB(v)FC(C)RU)O^yfq(GN1@`>P35nZP8|%j3IQywYY&x%ps}M1)bG z+EmRVJv>&RYK_KK#`&7PGoA0xom76t^Y4PNx{ZbW3w`H4v>Q0ARa$Y7jw(|4-~U(= zGvl3d`TN`GyxPAD5Aw->*PzI~QloYsu|dT1`)TOdM=P7d0lCr@psQ$;?z=gnF--UQ zvoK{9I&263E*1>(@k!fun&8P@0r9uWYCJJD+2Vl{ue${B`LIh{~ZGz0T%7tJ&Y*3)S8KI)ZS2G&5Y+OE= zBrm~S1ugU12leui!H`ZyU*IX6R=N)fVX{CU--a^mODpLeb%AJ(#Wk#313&u^^#)j#p1 z#q#)eJFwkCLWy(<5*>|u`>gV3uZ7pwT|%MvUcKT&i%?iviujxmw6Gd>xYPJ~DSJtt z1yqkKip(lxzM?2n>6%!KtG#G|>KhXwpVm!!WM+y(zu;M9(1zXl)zy}g;=oB3K4w~q zQBvC3c<$s-Nr402fA$L5qda~}-bkaRkhmrsby|&^{ilmA?ic;sf+D@Ju&^Td^5TUi z*^%&igqy~v-G(gZUuLpIG8Jtwt(%RA)*2gWE*FST)afIaU@y){lR2H%;u+eb zA67`5-){J%T)y^QkvlYB7`|&+uO|hd*zw`KH*UyWYRJN@DQ? z^RCPHHub8OKem&JzwLUA@5d^+bW(31#tJ6NB#S1|!JGx>k_*~K?@Uui7!`OqV@zU&?J!BMo^kOu`CIRL)9}0P6HWHP z#GCq4yqpsY;hW3fj+yVbyvm0(VyX?*mYSz|j>V ze3=d2u`VQY$J@Rn4av+T?-eKUGR$i0XD}lPsXL)V~n1y%$o$THEKWXy(ZfmtZZcRXF@s=c6Ks zxZcaFbG?Ui5JAWngxB6Z#v$B%jRU?DK~kXUJ0RDTkVC zZ#l*@BHyc?EuQ%Fj)SUX^6>m&g`B@1@#&$PU(W(!{Cr;U4a#^5$)8<3=CI=J%0|(N zd;xnzjJlvYAosE&7M=q)Q{u*Ud%&8Rw(gsaBqE(mF zYTIog2RTe$;p2_uwQ)8ccpx>J3PSXDyuY6pSUzRpE(SU>T3DRAW zi2y0}XY>l!0M}uW>5{PuC^;x(++SQxF~A}S=xEW|`q%=ND6bdJXevV{*GYACX5=jA zmXySu96FAU6g?JSb)P^SHj4mEJ^F4YX`Bv%*zH832&Z~yXryrh9^;!NS-ec&a@D4vH?)F^# z87Myg1%b{JyT_Bo@pYk^ftbAvMF3&nfnat&^Of8=Iz!x2==b1HevarX7W))O7f8aUro7G82sxVaFdbPvFz#OS3!Hl5&qKGW8m2SP^^oEb!$ zT`3I~)aQ||hGm#&QnM5Jlc%53w1$|}|6RLu6In@lY6^&Ux>tL(uHnqk^WwKMMfak!+E9xRKGsvJ2mID!OM1LW$)Yh7`pqmVWjUj@VJxX9LaCtMi7Ud$vj ziOfF)lE8gNm0@uApRG3R1QUH!vu4tW9p#vo%bMl*{V#(?c^skpMuIB&e6&A3Qp*WG zXg4${F`L=JJu%R9+*X}gD0EFm)7so@35RfOXGW{` zy=2t!KpjPysO&R|7nV`TafM3s)BL&1S%ElnM0~`Wz?u?gH7Q;+jT_I^K{5N^*W||1 zSUaD{M`jjP?=E=>-VFVIa+KMt1sj+fI0gRf&;@+!~H)5YUmWfHAs%bYuHsvu5K|7+wJ`Z znt3?iO3)P7rI>Gqnk??hjM_kSu+9ZY{PR<=^;Cfe%yjJ|6Q+9#02Q9@&^BH4KQ46mlTvJ^8BxEh4%C2Saxa{d?Q-6;rF| zRpi|0TrU_hdpk~!Yc+=mg>#iY8p(?jsC+$>ztm%?zEEgUc{z8Z@QY8SOGA?%|H@e= z^ktAUGU#9^1i%n86r)rA2yxCvDQ8D7eJgA0jnI07dN6==o4}46?x|(a$YU{w22=ywpIUNEjJP5>BQdU`(ZlA_&7%(hjtvc0Sarj1nO`%S;yMXEp*OxQSLSSOjm z2Fb12sVbw8_V*AT4_&fKdB{zOX4#LQ{?9n`R#H6HvMt?~c|yM+B|njt>e|3S?CMOb zD~S_p`x|uIHmQ5Ne`P45Y*09c2O>Bl5|SngnvEc{785dn8Zz-p45|ib5fhhD#SGURR_E-=27N|4?3G zMFi8}5gU`n_rD@Od{CKOUv~9JE@lPnztmPYfjlLuUuYSuPzy7OIm1fO{UrE6_uo41 zt1itB*{lo6XD(JhJwU-EZd!rzYtPPXp!yLi&_$`p(oif&fhm--DbkkXiEMvrOW^xs zvk^x&tZ`Rlyf!rx^!}J8qr_Ybj?oJZ1GZwoJoRDFhl-BO+~lmu)WkSEzT5c- z4~kh}7)U`osidTo=mg-|I#8>@Oh`ZZY+TgtSjp(u0c#HE(;qW6M0kJPSGSxMM`g-_ zb>>f#+B$Lej=+uDPg&a~=AONLqtOkD%8BwD`n!iH2e|@b>*fKeX4Y9!a2_uW6WC51 zW@1^VP9J*wwB?>@@!$P8HiuEx@A@+yo(lB7I5IxsNwy!)5g6^TE+ zgeYtdL6zZcUS3w#m9sD>0MR)&AK%>%x4bnB2v!EMmk5{#GDZNwJnGX~yrqCxe*d_I z>|&f;-UB2%i--;cGJh>W9bjYT(iy0626mA^tsMAx2~hC$>(|j~(dg?J>5oNS58t@< zWxbf#TDed4eCLpNq6(#Enjwy z)EKnO)Z;jKehjvjt%!8Vv(Z?q-&>N9c+hrxc!rF1$1lBa^VuXrrun+dZ4M>MmD()3 z(VCc~sTPN=Z%w9XL~RFUW)t;aX(m5%*qR>Mb2)RFZASYhBfEXn32Ary7Tn&PZ+5(` zE$No>y5ao(P1qg8aWoj9NfV#xPXb-!2DdHx|AWX1g7yJpXx*`&Kxfl;A#fD{dV92& z6cXc$;q))6YsD~Y`RUW*N!21`TyI&3jEpQtTLr1d8o(D|GOX~|nZ@SOPj-X4GsMCm zk0u}s#NQQzHcIP`aJbeTZdYvMWQ?QfGyYQPG8QdzzlX=hqHukpvn^uN_OIe-JGC4JV=08n*S=6D!TSg6MTr$#W4=9gRE`7AjL*G`If$e*kk*5Auzh;XZ1x2v+l+8jY}MG@;!$P+6C+0{>RYJF zmw^~XK~45^h|gRSH?#awv_I|&fkx2ub(mAm4TdhqX=gQ?Q+aU*5J`D~tOw-iMe}iC zKeLNmG>8MDcM~7}#DFX-A9NCxaT)?dV+ApsYP@N%5m-!Abyx6D1^S-~bNu?-B4o`UmwP|(kQ7j6o6eNA{~A;-Z#cC3#o?P@H4pIK-KM;?ro&6g zfBKNVQdC#P21fts&jx~4^3Spx8S6b-s_jO!tIqm#MEjYSd z{5C!~*5jiEZ_1WF-ovA8eR1_4gbaB^PSMztcyTM_F z^WbCeTl@pR`BegM_V-7*0WiAF?Zw~x_t&5P$FKYU*RN0F*WU+ye*C8bMop=XQ#Aa^ z;^)F2P58~lec+#04+U$p`06&kTzWEE^7=}P>+mns7L}s1O}clrJDp4~vqGNd;tQLq z6>K;u`ba5y;4eqw=^1tcd;vG(vGta_72N*3y{VeU2E7qahq!lt8Xi>i{`>G=R?G2X zTJj7}aIs?E9@UBy{kU?~3SYHL8UK7?c4I4Rn-wx9^oxIy+=G{8UKJVZ?!JJjy!U4B zS`Jjj@8uep=|og+l$P~={LWVwSmu9~R)kqh$D-m>`Ilc4RAU#r?iF3UVj~Pi1AaQh zmED)dG5}ZHeciU_*!#=tr3al?XE?5;Og2m2*k&iC&aZ1(CiGJ5?)l`iA@Ip)XxlJh z!^%S#H`row8cR$zek6J9Vfu+@NI(7_)!t9I4}qH?H+2L$ETc!T_;ynXxD)7Q5f3>H!P ziO-Eyi%&LM6M#H=`PNfX3nu}%biZyc z{3XWwq1`A#wnJLSefc|020T1GKVoAd+Ry;Cq4p(+>6QL{o9;DUYrjX%PO^vl3?Q}p zF007>tuC|vxOXJ!RO8Y@c_)~#^H@;%DV-f1WQ+$oryXzu%K)jwd3_k|J5B6n;H)HT=6|L=P-?L-VMF+)t7i~hr(~*V-b$f! zRKpHy1M3F6x{hIXlAQSP`Mn|6&?un}F@Px$S5ahQ%?3eN=*1anIVcDS`-Nh3ohvv~ zG3~)8>Gt;amcZRf#R@V9CZ|?h>8m!WQMW4>Z)nSW={DtRY;dyL`;5<>P5Zd=XwX3U zg|d`))?2Sdz@mw=sBhFcoM1Hw(??Jcj=(dCPEgW!p{ArJNPTw}F9}OF8AZ%?!=5G>PfTdO!ZWpVsVCG|R+~d%-%K0s;wji68m5=Hc<6b*!Tmh?YeSqX*M#R@6Z!-%tfQMRDm<}Axah`kM^E{Mt4SGQB#?7%O z##B>0O>1IY_IZAlA!kr6V_pB3??Go{g}Th-qjQZC02c><3Wr>yE(JsdUK?cyv|8>i zbA7r{IJJWlM*t1$y5)>yYXcLzlqOh;AB65{@kS8J~fkx-Q(psX zzK>^P`?i+685wOp$#2oCJ!nlWes9B?ZsRl$^qCNV6VM3{#dcm^FIa8vp={a`TKaj5 zT_Y7^bIkj}^gbf91_ycJ*N+cALS&w}E4cAso$I_1zz_tGGsp~qgv*VRLQpl1XJ!)S zUL+&W&v27D z%LG^aM_TZL{HP~$w|M#e$a3w}yX~N9?rsqa3VxpBGo{LObJ^&HG^a81MweO%t*W>c zdaVm5`*S4Q6{dA9H4|Oo9be91v8CL%MW7W;wP5eJN=oBo1@-&yt6!bV3Ze>L^CPV` zW_@qd(8#$W;GF%#3!uM~t(= ziwb+Grf>I`>pb5%sPF_LE=Uja;h`P=^^0;Ib2+uFOAm-+jF-=$JPZv&q#a4*#G@W~ zcKdMFWb-1|=EWWH1CGRKm6$vaHlpIkz(q|Ay?~=rLLh-u0CK^m$-`d~R`E5s4068k zF+3BJF6vcUV13Qv5Qlh^s-Koj)c)yJngu_fcYW8~&CKj9;tOb%fsL(iv!QKiW^v661nE<^(-OichP55q@AiLrdd(X@hI%cgJterQu@1 z=;VfR{3T+R$P$4ev2WF9yX?HtF zO4Zylls=^q~GUTXBwLSWY8Gw{+a1b7fk zW0DNf+aEEr>fZkeFfCWW;;lDL;KZkcRBEq5p`W!*LON5{ai#bC-65d2IWa1oSrV01G)wMt18uL zOX6zi7%}mj>haNC-8Cy7rSfP^Y@h$Rr$7jl%Rb))54)DI(`Ht4SAjwLm_BSOU3xnx zQ`3?kXd1%TJOSZUqhp&NPjJ}-5(t4u>b(aNGDqZ4?B%WzZp=l+qZ8jJLc8Li%Gjfl z>$4%i`dJrR9!nQR?;3pHz7}-gxhE;nwiP?>dYaTQ3Ev@dIPj_skubGdfQ+{nc0^`Xvh|JM4F{EQX0KFG{kPEjNVNhhQJ&20J?noD( z$}`z&XEGJz*87KtYxhsal$4b%&n+m3(Hda+j<_6sGoGXH3~NH~z*_P~{pCIVjq2Z6i=JzY-f23*LKj;C0a8qFv4X-( ztW7A*t7Sfq54Ef4{`O@R?-&a^vSQtm_03U<`VDP$?v%`l3DxB~BJb;l9Tu9MdM_M| zZ&hCwF!&FeT4$VmcB6>Z4jcd=^aFs=BMn<;&=k^$^#B{JtgPx%FKX$7O*gTp&4^>+n(1l|>nmMz%ihqF_aC+=bTZKGCJBC$_at`%nR2H7D-w z$G@bP8sABY6t{m{KMhB>a?9?zKpoD3PlG2lwn@u;@)~8?8hkb`zVYk3vwQ}|220Lc z?Gw4Vq{(4&={DPDN2qkfY&UfaGD{28|0UfQ@;$>U`B>#%aUGdUEYuJ!T)Nkd* ztE}hPa2bf`45bhhBl9%lDkj{28%EjECj^+1_BCqWKM~hxEpTtH@n`c_`_An8r$6;> zR1Gczk45DkxHP;efXoCVJydrL>5`T6TKCHbzO&w+Wm1m?Zi?3Z!b{VUHpis1e4mCy z++OD{Kcp0J4EJy-+v#UTEQiNhR)gXW7U(d1Mwv@>bDvJp{u&`F`(O-Z$2Sd|M% z)sppc@|iQU`pO(fHcX$1_gRHI+N0}JQ2!aeF&Ln*Uj3M?QgCJfKfpmQ} zn^q?KM@Z?W;B*I#PQu}l3iL*6s5FGl@s~7(s0}~`oio!##SYnifl-nr$nYu%OZKM1 zbW{TMbsBV>q)9-J$)CLB)>nx~E14`M?n(PUIzq4^>VV4J!c#W_P4o?7a?j=rT3xx) zI^u)KF`G>Xi9a0_I}wWDoi~M+cGaaqHW*>wwUH8!| zLRva_7ZCqXns`VePsN%$!e>e1aoleZOGOe!lA$F+A@T#vR;&MUHeMB%s0oz~(u>bx ztO($`sO$~jd0kORoJo<2YwTWovFGE9va;YkFBH&fZ^Yezn=uNK2CXX}mO+|O4KNVy zk>l5{9VC3wI`Ll=bUB3MK|E?86nbca6?Ami01R-VhPN4)$10EAcgjHXJ7c7EuLl3j zDRl)(>&cL*W&H|8&uotPj#g!R%9=+5Otp5B^}Q3xnrFX;Mz!F;sdXGUjIPa0iAuyD z>Gt6}mvO%~CNob!YVONT`_bWbzq*U{H1-V-d!)T-IGXR{Yu0c(QbVBoJ9k8vuZ_za zgUZQw7b|bZL^$7>VRLyQ=yzo~V36#)b3rT-&O!J3gn!N4Q3Fnh8dm{8a~km8qQXMr zrbXp|xNt_qWqaPeYk=`kjYwla6T)ITI(A?OykBQ~XD}MCZtn_uijMP?A?72NLjqX< zdAcSZYoIA8?kt;ZYAuJdCL?S#3GhtlYjibe@v&|Ckzxj3U9=rXE?&GCGQ*x6`7bU; zV6_7wMHhdhQQf=NRqJNuPQU!!SnGGHdxD!x>}=fFw>pi{`HGLM#69VR)(Bk>1%e6$ z98mtja}e*ug{V0aFoG1qblZXA*Qw3L(62y%!S75)#-q zSfjVUS0Hg;c%h^wAHk|K>*#6JTEFBTv5&8;=oe?*o*eMmpl9{+BljX!zE-^=AH{5c zd31J@$a3$Cd~!}B=54(@FLgV*(d}pM%YN(F@*w@mE0=FI!`jbtjf~4u_T!Y7=v#V7 zUrP9xEscR{x2q__F~66FiH!02=J9PaV#*>1#v^2vmFbn4 zN_G5}LV>kx8GoAD{ax2W*JxGN`L6KFQ&Vjd2bOAzSw4LB;!Fz7asp? z@b#WO7CEn&{EhG}UTJ2MI%lX_%iGfoIvR?bd^|#Lls$>iT0eO7>cbTRe2N{{jB3=C z3sdU6nnF(XmtAGwoW4)IIC8uB?8;xNoIB`ZbqR2LL~p78BWe7Re*7<-8I`hk3w%^BxSP=wM2TDR38ip#yq&tHx6JJ|w^dP6P|mN^$P9mey@;!8CZC$VXF2_W z?V-ZLQjW{LW_rE7Q8bfD=kx_8qPv36hU+QHNhT~rd}=Ta=xM3)XgxizRaYmk_EaWH zN9iwSYC^EiUgp2uCcY(||MU)t1Xn3VHK{tr$d5iz;xxs&lpEVFGiZOz{JBYI!{Lif z6?3lw7C6L~y_veUwq(LMaON!a*=fPfJA!7mXKZ*v>%uR+5`A#5^n?X_PHvg6!`>-N zMJ|QerVY(yk#P-E3~kSPMCoEP#u!fhfo<9Tv-cvPu&j3Q14F~=*xnVP?bdYx_Z6r5 zwjN?|NmZe8%Ugd~#bw`ZE7h%n8`p>nKc)k2rWCJ~=Ncp_F7}x2*e$AORMW?PR|w~M zFleeX56KzNmtFNSJg7_4b}>EB?pw^S=B>QA%B8fZ{quX;O?#i4QVX$`Y_~NQ z%^58D)D~5Lt90$IwMI#8=Y>8rbH!WqmH*;#d9?hZDQ%Dj-c7gj4>zW! zt>;$6G1xgs&GGV`_MeZ&E|d}O#5-jsn!NO5*Kbz(BwDP?BFgFQY**Kevg6}nwJg2< zsO*R@?Jmx{K5GpOVkoF3x$qfQs8|&x)(?+fedd)Df(3H*a__y^)q$(;E~jpKbglZe zmvC$r&2K%s<%wd|-$U;Yjs1@5Ut(k-S@flpJMBdx%g1N4SXwLA3G1uns3PqZSF&*9 z|3cXpF}~i_96MOW)uRU%=?2+3qupN}YbzG$K}T2LP*vl4e#UurMFRjglXuljNwJA3?Ne5y-}pK1}XC-9@XjR)PP*V?-}l~Pf;n6jjG)K=@u^K>bqkfYqixFWTI^Gv~@84`__xd z=WY!*I4OP6Y>+gs^3;@IOLm3>#e-?c~AC} z$Lf1-c)Xr_>#c`Z6!RQytTV+M6_zhQHoZr%%J*3hGjqS3ackLCtnx6noCO0(eBt4V z`W@3pfA4*oC52V1r~31VjLAy8wN6~T;9&?Z+>Qw^GBZmSDGO4p5`2yPSTdnWo>%pL zHvQrGZ!J_YB~3n+l!2QP^Q*SG+4u0y?fm+waV(^qj^i%stPRdJ7ZTIbRlEQL4z+|VmsD=3hD+o3YepIdV9^Js;>NO)-F zp&@3;F^kqC$|-^QF2?IKIKn^nly$SU?R%ZMaGYkVh3u2#`!3_QSI4BSv*1!wSnuBp zSJYSenk~l!Gvamx7WVuX!A|ab_qm9g&l3#xW}*>f<4D=jao^l>C#fy^=nFsp*23K%?aMwn681ph&3C<;CuZtfil09T_KItm z-osZM$$Vpw|7V)#R}G?EPc4<`@8y3`6jOU3?rjQ+q}^xVoo^pBHx$i&tLG-+GGtW0 zDH?tD*GG@--6HCrOi(^bw%1Bp9301O)e;bWC@4#RA^__VG~V98FNKYyNSe72t? zj}l}V9us10d)fKOMzb<23_UVE3rlzJ-~;cZnZr=dK@)w4FeK0+yiWl-eko*1D226L zZ**8oD8Ks+Hl3gmmP6Yrs%5|6$YXJHpx@YWBlNUlO|1+-f7;CsmU{XEY&}e0?l*1x z(h^cO*x?vc5YxfwL>j-Uu zHR_U*mLmJQoqo?uJNmj_I9D8K^!7Xi)%B8<$@g+Ryf%w1vpP~hvE}Nd%h(srnX#{M zXY|g`c)oGDM|xlGSbltZ54H+CpupbdCFSMD$e)2}KNKWbB*=>g zagk+2N(-MXaY2g zyW#+gumPGEEr)?*ywwWf)Vy<7NK1x^#h3o zZ2BrbFk=iY+6%4!sCtQ`} zr+^h1D@IGQ7dQB+Yw^|X`$o+jXEZjo%m*G*eX{}w{zldw?*xz7M-Ke#Hkw&$cR;33 z)bZez$#_o;q2e8{`>VSlu}MIaPQkur5AEzvNInltoCyQ&uw+~Qea+rIYIMhtn`ltd zB{e)T(RJfE+s+h$Ov{7qgX!T5p<|5aQq2rOZZf4Gqr+AZ;V>wIbe~d4u}&x`yuK<@ z&{ZnWzu7VyG#we~V_qBZNb$c{V+)q6j-h#eLBS|QzHNhpcnCAwS8`A?r+XL8g+wJu zcn@g))xX=6kAd&3LQq0H98ROQYhvdBR31j))S_)jR`&5}j=34>C@ugqtS6J-?3s_` z+~8w;cJa_Kza{P|qDIQWfZICb*U!Gt8Q!}&Ej?UHBXzVpx5$y4n7@ZspXy>hDB*0Z z%qFJ6wbxvMJ~(y65Qq65y@;0&x3dSTC$QYM#~gwaa&n^GmgN2ex*`}d#PeZj2OmP| zV+ED(+-KXJ+CHi#vCL6Krp8;P!iX<>t)iQ$OBP1rvGXF`rHOVc&J3 z&+6>fi85_CMR_%C&IxUSk1m7-RFtIecPoE77M|J7TorI?Nd+;6y3^eWb5_YSnd{yf zN-{mq3lb4rOgHfUjc_=$o$$t%t0%Yt?%!O^1PtXe06D!X7fMnvh(5Z21$L{n7?*`X zQ*(Z#qbNVccZ(SFtq+Ib2_j;BqW_0z0Wp8^>5EYz344J{#->T2j1soDjm8KN#8< zpHX&6tq8e)1s!?Ew>rr{0ET=FV03$snGQQ&cuHtk*cYhj<$(BLCQcOKU)UEOB9<%m zgaXVq%9>b&TQvGE-)*C*U zxuI>id+$>2yX~#Bd!KVgYS-TNaBvrV9GH-|a{P=8wH;eO-Ium=?ALnk2%=AZ{_df4 zL`_*+&c64iA5OlYMeY#En7y%q&YyjJ;&LQ%9^Jt;S;BoX48_KS>y04Kzkmcf zp`^4P@h#AE3Q;iT@)4F73H?D9fJRSDZRf14L`d=DX6iynLvPXwY!ed>J`m^JeIsKq zAK+x)??lSg-l~u%wX2O=?Ghm`Hq6Z}*Ex*zkQnpM2)` z=%m!cQ8m3)pDibwHG2e?_V;1m4%8xE zm`o&A5^;!@ePhqm-lD}QCgb%VQ^H`w};?EjW zhO_!OL^d1uTv&<-50a2drX&-|xA<7a5#zM8mJD1cA$^t1g=i0J^vA$}0v>hZKRfgO z%615qlQ>RV;%$1(o)Va^v+Et1H{&i=nob+6z5d5-5kj*h##TihN`GC;Rh#Z_M3HoFidv2S@rChwT8&@ONRHAkALr04)x6X#Lj2n z%Im8AiwC77`A{bV-UL$*?*N^O>38ZU(4lz%IVfD`9jE{cn{E37?+pb}@YtdSYi;og zPOgW+JHHJ+_RI^T6W%VE6j~-`j1$?)aOsl|8PZxj@Jt&Y8btBm06Y$; zS6A&$dPgSE0k!c8$T7_OzS8xXTzb@04>*lo5TE+G4kh;`WIq4|tP@>;825#FT$Qj4 zcX*V#@dT0i#VGF!59WZ!WP@dyfd&BSjo|2^WK@?0Rn-9b2;$1FfdQSQe8f{!p07N2 z;FRtN(zbkME|`G>Vt?Umq?Zygci=S7?heu>qe!|k8K)pgCpBb@TGL9CTX>w{Pc~g- zQ|QKQ#c}-T=PN(7gZTh}hhpnGMy@JfuIfS-&;u_836hBT>mGd=LOgvKOh^L*a>H75 z_)9X|ejB{Jffq8+hGyf*F&`ty;7Cy1Z27Vx%?cd^(pLkuJ`6}>(S(~32W>n)?0>uTA!;Ua5KIa(RPze@HU)3zA6uX~A||mVke=({ zIHMPgmkW*~68Cs08E>p^a_BGemGTm{do;$O8b+RxcJ7&*`*jR{Irr>mF2CIkF*ApR zM5H|Tk7Hin(40yoLjq$BijEegsQ?L?=?i~aGYqA|_xYsb7Sz?v%1}5$xDy zb%oexmS0(zDO}*1piqX66xK7uk3aD+i3V9Bj_D+d3O+u6n9Ui@xI{%C!UWyb(^G<# zy16;_#6uYvMA2~C1GgP%6)sFS6duH$4f@Rf>cac?*PtQOy_Sjg*800LWXT)p#in+l#F|WP- z+N=uEaznmJn}x}eU+wSwvAgfPxPE(G$=gS=dhFwLUlKQk4%b@jbF@%CD0EEdkwIK6 z#NxXYWT=oM{v0W?E-2jPu`G94^^Rv_IT_a#xVBmzEd9y7%66gCU^bn5ojf(A;0_h{ zx|j5H7+m=Uc{k}`qw`wgYktWH_Z^-+hP7)u`RjF6orcENI(It3ECpEvh28g$S0`Ac zgEfyt**}!MWWI3wdRK`FYxU!Di-i~F!Uo6Mj&!l#C9Xhl)mENvJUyE?tI_6S&&d^> zdbT&gaH-ezfSeOsw?>}38XOk38H(69D4#fX-sHf6`4yL%^P+R+E1Y`r=4KG_Q=q<4 z-?}CK9I2kKKwm|`3)=Ak%%J6tZ5{$CqV00m%89|&4|uAi?d-h%@EGF2ZOsf}sB(Ctujz z;&b|Frb-UqqLGGbi8%YQV84=I=%3nb7KPuH1RD@*bsrQL6bypo64jbh=9CL)JW~>7 zkA0Jmxa;)KPuf;#07Rg=6MAk2d*_G>)h-GYsdh7e-QC%r&m}KGfs9k*T>HO$i>FV2 zHW%!_^wL1jx3;|&$5+q=V`%C??*jx{DQG(M*8CZ;N#zg8K-qQQ#Bda)olPk8x$_nt z89+s~R$w-go)!v%%BhSq(VPNfwq2k0ugu;ha$mxyhnJMKhd%7A(&Du~y9}jzqY7Vh z%0qY0)PIUZW%;>zhiF^xLHvA-@dyGBzOM1C2ea*d@~bp=&xhV_yVlXsv3Aep-LtPnpb|{U z?xY9cviH|HO}mIzONVK3<}gloWK_!L?$yR`&ZQkZG&E+>llzWj@tvbhq&Mxe)%}zB zXLHZY2!!0npKGhG;^F2V%5xD<(5gN`AWHal$G7;8SF`viu)R&)ZozH#<}(ZX+Ky(5 z8qSGbtP^3H=lloiCJ)tyFmZ*uDvCPXh>Oe6%i63)l^}mI ze!jooJa#4{J)PoszRwt3QR^5O4m&xCVAKBsZ!l-Wcx_EhWz&3bSnDXK2qZRDX#+4; zQ4~kyN$6Bh;N*-L4TaGj%$;@clEg88Ny;Zdw5BsM=WVEtAvM15=hvC7)M=hW^UK)L z6Q^}prbOjniGWqxY*J6arPPm&&Kh+-NUa)8DBhfwK#iTWl(F6I8`o{`WfHvAR(t>c zeU)pBSmy`LwnfD?Ntv3O#+P`@Zbv~pihub-pPuwR1w7ZS_dan^^QTxLdH%jAN|OvY zbQQ8WUbP7f4GF|YqMCc)i|*U&&cm`*XWd#J%Jtu~yR@@g_>j0j!@K@{@#SrP$Afv4 zP?f&u$fcK8o@=##$N1j+Kj8_&%u6Q{62W0y={TEny#KKN?(G5snF?2z3K!BRsr%iF zc+{b4O%+$O$x341l&}9Dj^4H>#~|_Pt*h|)(xCfh*-$~%5SNBp={ON=;Q#@=l9kv5 zQ6ZBIspJYBlZb>zPs2RVH>Q*al1i$rk@YJZ)ubTZUu&N--bRT5af?eh?L>5!Zu+>yXTl^zG?S2^{G-f z;~yVcT$Lm~tPL%`m+W~p^l@oFkQ@iWe#)0#-ovzsWv^z3X2Mx)+BlCzj;u0dShI#A z;LO_BAqmrNy7fv!B4$Ug=xNdRkt^M^+dA3od!y&V1NF!lp% zp3V}^tPAXSt&t-4+F6QO_P59hKp=IQzpSg%P#{vfV) zEG~~MBwPT0B&aAi6QU8({Uf#9J2^A+*}#WsW=?ALp|?wL2oQcaoO5s**WHKn3@ zGWA8AEOj9OkxQ`7X<~a8u&qdbiR=!U!6bnS{3T)~fT!m?vV#`Tu@t$ug5JG(LxtT1 zm9EkcA4Gpdre(~ZPWDuqYBCornMagEfsV=0SlHuks*OUHE|rtz?d@ApWySspDx$sC zE`8KJCG|avYbE%wpwDAo04bK<*(wo%xOFWwDM&NDD~z}%?#t9ZJ<$j{1$X{fjqp|l z&c2H#Ok1nOQpD7_U`Ah2gj+AK^WsbS1nl(78O6`o!JogVqv(Cp4>tD{pUcRYIlrXYFUed!eMBQYFo~Yg zZk*@v%neIBdYdPw?%%w$vh}AOSFTlvkR>wp}FGbgK7QKH&yLwtksH;bpZ@%rI-rwLmb-kqKC-eN1ZO+`N zG34RjhT=VEc1?7&7*0e_qooUDx@ZD(fY+32e8pq#wpa-IvNzE$YeJdw#Dx#++2IFF z3QXS6P=p>k=>9rt2TCc+l>6mS@i_~W(&s{V_nQS;dzoQ1((V0`8`+(|Mwlg8gIncq zi=5dUN-d;{gLpd#OQ#kVse;)N`TZzs@MfJ zg<7Q*JEM2?h4Toa7I9$(gcioFd#Cg~F#r@#oK9AUd{-VZH!^8XsFDdMmAoBEsuNdN zS63s-A>gsz1}e`F{71fw_*EXCb_ei<0$7omFpH21qho3wkTvJNorl;t&F}-Wi>oWD zPx5K5MCFYLn#{FEn*H9U^`T{M{83TDYPs^EL_>P|v%*cOc2Un!(|K3iQHi?ol;-(nJpic|ZcnVg8QqZ5c^mRS=&C#FC{!|%`&Sm5Sz@T1SB zCZ~@@PsXZ#74UBwnh$wXal6*hP={H2?Y>MSI$MVLJf~CJiIr<94UH{w9F}HXRmvgI zOTdL$eT7sWKv}&Q^ci!30GJL0=@@i1Dy)gh74!jW%GPV(p`edh-;+bFD@zPF0h}Tb?xye!=zNmn=_!jE8!V zb%cn!ijW2t5;Gffj1&?^zPgUoNl?Z83LNU|fx2t!NPvMS8EN3tx93Ok393-P-5_GO z8!ouj6!__bk!j;#C>?ET_xPy|UwW`v= z{0eQ$95}1pLLo4}3l|r@Z?|2eCtfpv6p(Pnh9MAITTeGKGD51AsP)DoGm`AZ2If+D zJaKn#|16^Wk~ccK$Ch=VqNPu4=kSP}u_yH$s@|d4jSi&-!~3QD?H#pC{~>iCHhWF<&`FMq; z1pOSMgfGgFbUoWw8yj5o6MFXfY@`125395ob7Y$;0cYs(H-37TVIb^?@1;Z3n|jPet^U9%DZ+5ZDm1Fd!!4aP!;wHO$QK zfDJ(PAC$oZ*qstz4@p%!6LdJphq4fCW_NfjM*}7nWOrf%RIz~b`mc5pv8Gn5G7zt+`~>Jw(#Pp>UhPRA6`0;ay=ulD*zF>t?Ce+b4Fmz-so+fD)owwMq8bOjl^>L42{0dM1X@V+1 z&!-cQ*(LKrJd4EUrX)VK6M0!T7UAAn+i+s&CEG9*=mhO+=+0Kzi5e;fLyD7TGNv9%o!kegQ12`$0`FN3U{5cg3D7KgU{0F0y9;okC9 ziM_$|i)@>npFaSqEJB<@m;Ql>eFEG4pG8!wc_6qTWn6HPP(uHP<&$(K$B&?CiN)is zy=87MJE3Qg(R>)gTx1xma|6*8YWG}lGe5@DfNS~AWGet@6t`Rpr2G8d?&Tt~By|11 zbqV*&m8L3-o|vnE{Bb9H`R(Pe^DlG`shb5UGsmy8WT%?bnyyKW;V$9Zx3?+ql}C8* zTVs*bw=M@;U*EAW&C3!GY1`+rp0!&EBL3_R0x3uPLY>yh?oJ;20E5y64AVsIX#dG~ zl^;IbM%;}mi3B;2crbPM+|2Fc(HM6;mN%BOLp(nvB=KbjZy+5TW5~q(iU`M9xuAs+ zu8W1!p-4f=?nNAMm(O2he_rTyuR7duVZh<@Gs{CTVciMN5(+nXoHP9G;dXX0yqcE%d40kqt1CBwLa3b z$CRvXn8$E+Da&yMh4VPGcfE)#*mvMSTKNF=Z+a$VFXd>^kT&=Q^u>qW_k&8P4HMQdnmoJ~P5juDqb}+e;vB*NoFF_Xyr7qr)Z7WI zj+R?TH1q*`PGJnPII5a&&}$`SVX|UyPl6Kj2^9i$#!>KRiOvVmGu$L2%t0pYCm7rH z(*lzG+fYA-|M3;z>Q`T1Ibw2S^;W`6BU`X$uH7Dpsf*18Yca5rP?OLfl}B5(Y*I#5 z5=kD``8xEOjWM(xYmX+e{RUAQBy7?*gyebNkcLJ z=37_JW;yPz(B;P7=HCYw6&bhhi~L%e^?qvYH?wc|mT%&n_n=9V2z@iVrT6WY_iy{6 zH_Bbz614UGxv8srkGLwFX|0UZADA`i)Dz`2z4<<8L;tvoZ%_vH+DFg$yNE~ppWdpZ zzHL4_4!C4#_}4kTogWh8>~u{GS)!gXHl^siyz;tir9d*H)(P|>e_M*8zKc#XyRT{>!-TXv_o?*VN+>L<~8U1_Y&n#v9(%7s(a_tpug1 zTd6_3)*m6PSTf}il}m?nnh$J2;G%Gs-@U~CWh+}+zKDG38NMk?Kc&>mkdUUmIp(}+<9nKi!YwZsYjzvPH4NhNvZ&i7N3=+vC!-zcxMLn``d8uRDIkN}PBrEi8e&x=LKJ4N^( zOR0L*N;`0^<@={{A4oLqEWh3i&Y4iSe5+38sxG_Cv1PWWdMKbAUyD#6a>B^Z%znOQ zEcc0Ul4z*Z6)#_?(bu=NMMBb9-(toPPfx!y;1wot67MRGNo*&KNCZ&|wT*C_HoCMSZmwuM@s{kcgwBV8Niwxz}Ph z=^&MVz_Iz%K_OF z;*opE_K>d~!S;-@&N=Y)5H@syxBzjM$)}`{t}vXVq}~MLSK14dnoEeI2#9z>(1hCr zb7g!`Cped>Mf&t)jo23+jpwj@IUq=W5omne&!5(41D|PK>3fcMg#Op)<-vG*+MIsb za{JtGvpM`@yf%lkSq^+zG!58OIVi|nGjLZ?QD>UaI?7qr(E46rfGYc`D+R3G-mM{( zt6%aRPj*QkKgz1D6xMOKeO=Du%&lyXXM5N8T?Mh(DG#;&aq;`xC{gv#4*eGlgq|&q z-dDDllzZIgN4FA$1~y@Q>(@wpaNXeWa$-f?4qN<(x=)~Mklqf!M#fk;useMj8d8Gd zCI>Q$4MLWi+5JDoF~g9sXHRq4w_j&U)~;~&8{OkR2VzN&OoMgmsX{T6Z&sg{#>nog zWq*I|n3JdV!mh*EB+t#?vztHoP3&N>McO*tRfu+cv6h?7q(q%gb0015Q8Rf^-IAcyqA3{YpJ|9^| z32zcwKl-EboU~pwdznlV)uR3=EBW<0nj=3Lq!#-s3UjGHq$u8^dPP7o*B=e-&GoQh?>Alg6v5r;X+u&M=n=kF^ zhJM)mUe)&G0Q-)rcdh3iow{(6;?-@%sEA+LKZdw`r^_oYbkxvp-GBGz_5&Zv9v3bT zHai$E;Ur3N2* zIt&Cq?kc|xvH5=5$;YnV9D||!jJ_(@e`Oy?F_U`tmjCRVw9S76&wCX|lmtbS!~!bH zq#BnxKFrQd-U>ye33G?;+!mJj60)XBnDnpzeE&_YsX*4^soeTSjKiYdJf03X8`!g(@s3_iF+zs*~AYW+%@T!G=I+Uu-ToSB}Tcc2-C+E8d$ zd+%^*BXxOGSXoj&Gdh56Ze80t1 |wcB z4eg{o8*C>jWWO1GEdA(om)6k)RX8_JhEr{T8?kWh=z~HwaxmSuu$h-K!v%&X=2cbe z`}=}!E3Af;nz#9w4ra>EMM1#6iEJ`K|4!$k*#X=jLV~{r1~|+!Hcp{yM1eZ3rCe!F z@2JWsyo~58A4BxR?q6Ep>dVO-slR8%*06*t);#qpQJ1u6LY1i3z|!y`R4f!$km z*PNMdAN8WQ$YZl%OAS15ZK6KgY9vOx1bI zxvXa4`&x{WyOXZmzi+b7K~#q7`O1;qNp5hIDLi0Vna8Nkz|b%%xM>4(cNJU)6HmXU zoi!G@?O=RA=!Z*k$d;Rq{jv(@`}%gKTo@p`$m30peM92DoG`uAIsWI`5oKlc)5JLI68=TkAt8Qo85|i^jj~l-SRDlSW9i3%u$hht+QcoOW@%Tl6-HPXD%9Ua`#L=%6S+X^znbh z{<|i^eI6A>6gu#sC~%r#aueNfiOuBCN7A>yt=M?byIRO+|EK^`#_z!Tp(O=%2~`X$ z#;;3|BLp+$Uy~BDIX7Y=$T2x~@AlrWlgooN*D|M@86d;Trol$lhIPCu4u6>}Olf{Q zJmM`K)i=Q!lO`Y;vhSf#`B4; zTc6xEpU7I2U^&EG+>uz+J3Mo0AFRmbW0|F6w|6InTHBoBzLOBTO=UkZIE>C8Pqf=4 zH+<}}QA_>K&Ek?zj2OfwOLus@d8mGPXx_$RS&#o$wfH>!>r%^B)qvmQq|@|}T4JGN z>;Y&o)~)xypnzK|n6O3ahOXSXSvmsGs~b3VdZt9AV8*6;%1KTUBa^*hK%f8$kerxM zrJP5yLP|(^P2(@{oPmn{_eum%>l`>HJ@qciKBfH?DIy0gUoNkSp?6c*Zgfr*0?xoFMSxJPg+w)k822Vx>)N8 z-H@5$dmg@3zaSxCp9(X!j|24#GP1WMDAm@>;tnnQ#^*uPV@Bs$veo4}x}Xtm`n;2e z#;$a2vdaY}Vek93P;)zS#3F*?Dyv@|U$ zx=N_gd7z0#jFW?#L-70SLFK)>v6nqF%pDKp-#Q6mJX8HNtgz@l>dNVt2+@nW6uRs| z@5aGm>Ms%4OX(F zuIv3M=4ai$&8hfId4EVGvsu^wT`7Ly?JDDyVq|K@aXmf57TXW|2wohcO(3n2(r`G| zM@NDuJbS*mazFvV!a4{L@@Clzvhpy-)yG^K|X!sw70$-s^}BW~1|P%N3bfYqO<3gc&P^;Ih@X>OZ}Qv4AQRp<3lCo_`!PJsz{8|r#^ zQEmC&mdJqydy;P2t!0C`EFdH$ca>0njF&gQq01NM*RN?&T#u z6l#Lz(*q{9i2C{ZzUpI2yuir%E;y}yZR3jyM@LsCrgiJ6iqK3r)65VrgzPYW15TMm zM=?yZxo=d?_N2Zg#v9^crigleGdW|}j&DtC9MC{8pWKUzxSSimJKY=Q4yMl1=rv00 zmsWqnh669QF$_|%B}U!N4`wt(E-oC8)%nYQlKALBojkT)gA{r;)p3DaVFlM;zgky9 z@pE*tr&~zx;&1X$fx$AOeV|jU$N9Xx7o@F1(*aen;v`0#w)ak;MtdCrP(-Sw+?K^A%aecTl$qKuir*nXfKAeZ zE=RF}O@hUJHQNDwB8ad>zBS;1?bW1|n!&g4n~9*Uy{LPNWlz;r|0(k2hTAuUmD}sK z8fmDorY0$Tt*Dnf^QkNt!R}4E(YkbD6)UuplG3yFG%~4tVNW9-p~wfeJg!5P_drH*9d!o!Mm{41?&REuK((ur<|*qX_}GJEP=# zedBYiOz*d@>BPKOr)52k9|5!2i%K0FHsY@x6 zuW|@^73LMG*clGM27w;qta|}u%!6Bx$MBl2caY#7mX-HuR0@YYb zT3!C(i?y{GpeI~!JbzY_OYiaAMPXkHgL*n4cuspRKdtW_MwXYCDHyWhhkwp<&(*~J z+*Y^A|D?oZ%piIA;uC*=ry|XpTU%ni2+RM4<@{}ES90tmC62BCV)rI>(YWqV!wf=6aul&AmH+r&0+f7 zpCleu;i1#JS)p`$34K9CBmcM%UsAZlp-{ECxA%VIA1a1(@V$gLpKuUwv=gM+mL4^O zNPq<8J3b%F`T?7bhq4u$gKpjZVHWh>iaLqkJjo81>EC{&c52K8<{@sPQI z8+|UEF&)Tf#ths{B_ci@rG(Do^ zA8^dGKBXU*!`Q-Q{?(E`M&cl^N*%u!NQ~3O0{WbCYiYz6rSB;eEx?Ecc(|JMCGipu}T|p_PMiG?P6;$WlQeq_+s?v zVjKsop$I0VW0jTi%6ke5@JydrnctEfBTr=s3;_HHqYlv-c6#iN>drR}2Ef1&P7?N$ zdKBx6)0a>y)K|777Iy7W!-O^*W!KZ`=vTc))7ffL*Y=|A@YskQxO4kAT_!_wHn+H6 zVObb*jBpv;aOd4?Ziat%wu7faQLWpCO1=aE-JrG}1GT_CSExrxN0@@tg)cif1?99p zf6k95;2j!otBX)9q@$}Y_o;JPOk;k-1PELAr0oV?9HzdZNkv9Iy?SfCMXd+_LB&0= zb=17MYqRY>!50zl?NudV&Tr`b`Z4EM&ZgeA>g&Zx3l&aAqeIo4J9Ze8tdDN=vHSy; z)sri}#s1Fv?!6U*#r9j%uXFO8dggMwa(y}QtfLB+dc;yC5gT*Ko;%8yWUgIB=Q15V z2{Hm@fA%lTBs!FxZR}vldXWo+A+ebecgN1CV<8h|Kgt&_u@bgB?=H`AvroQ|X)sCA z*w$KP1x$q!c?r7LWU%OCNc6@4l!c-XjlTD2BEQGP9b$M!7d__1FboL?&|ss$pg>{U zue4}h?7$#3cX#)vN|BXl>}xe<9s8R1PX+n|iV>L3p=vUN(0NBsbC`um_o}~Pa@o1z zTisc?SCBHzm(>5DJ~lJ0(Gs#t{h7W7A-BYo(}V06QW#Q2dltvl@=H%g-wfVS;TlD9 zL0m0Xvk~u$ApU~+e38F0@4HbE=+bXZtCkcC+0k~SNX=9YNhfw~> zIz3)KGp!A**E6l;7O|(+M;0ob*+1gkZgZd6BABFIg~+9=d*DRRgA0~Sx?$6&~YD(bQz%|^DYo}Vf2elwKNN{T3AAyULKpke~`G9DbeHtQkV@%BQ9_# z=`BGMK<)0nP?j&eqh9i#x}Sz8gQ}-2f>u+1DC2~iRk-Tkft~UoS0w&OkKnKa+wXp( zH{E1(Na5*qlDIO4P1MdmFAS|29AbS7d@niJQOrQpa=PCoi!FQz>Y}GnqR5N+4%YZk zRHaub-Bjg33F}iv^K$vP7){b1TK~$=D(a*rn|Q=% zyg>+!G5Hb1C}s7>Kfl3?O$ z<3(nuU{ZJ7^^dtxqOh5glkWQTclBK0<=i7IWl$0|kliG)DyEy1N~K@&%9if2cA6aw zaM9Hce5-Bd2f&9H1K2Si!H#i`mZ>g$9IY#Q=+458l zBW3#&LmV{DY%G28`F)wh_=j^{{wFx9n#UDryy=Ix;aZZ$1>NXQWu*{=8@B~kTldV% zdrUP*A5Ice)fD)GoaS@?E}shSXQN#a3T`d_azMYlc|5zm=~Luc>b_{2kFm;sC=2`4 zno?wS9xcKx7P;MkEP+URx7Cs(wP=ubO&BZt?%CAV(ieR9M8{{&B>IO0aeme3{dk}b zfD6wh*DLRe4GV6n`XK=54rf`#`A2&4defO>ySCS&ELnT{8DD-~+2La~`|>jOdrmgQ zeth}+?5{50B=iJTATv0~$5lWSDIZj8ej#fS``@r5UJgM&NItys*J=A>mMyFN9OT+J z>tVb1W(VhXarSTRj8|XE`Saq2&A_G7H33RYiW$|Xtrdh4V>82-c5q5GyJ*#i)#uZ2 zIB#!bhl$1bLV(1HIRWEc^~CM(aAuKis0*4%ozdS68}MVXNhbK|Dd%-ce%o;Ms=0jc zjG_d4v#nbqLmfW~iP%>$KG>LleRg!bP%!^EyFly}#A`gzpU)_L6Fnq;;N_JC3z52OR-GLc$MbdUu2jror+cCdG`5zWtOn( z+VuUr6m5etNz|V5M^x3<9b&>hY_ISr_$E+HEp@_IEI39;7edLI_x&D2y68V#7fP|e zxGp=ciNUz95=pK)q0Xno{O5Z6HdUxt-|Jo;w$HHRyxxmqMc4Ra9Y#y_9N5GMWn9W* zX0ukz2;by5qU>`vXt09B2+fLLp=sV% z^-iO?=9j7V#_%JDNS&N1dG4o;M*Y%wEhHmrY2HW}zQy1@nI~@R1H9n(s5gJ%&MI}# z)#cX3|K1ZD%x_Y>SFbqHTTfldu>Mz1VR5k3`&18(QM2V%8Bvw6^V zG83)`P(n6<|9%m~Z>sa-9hfGLBeymhu@T~Z>`KAbwCov2&NI{3XxzwtxTt&1r1_@U zi*VVmcia8fuRpCR5Opff>zhRMahzQ85WB*LM&=4(@FB*2XgI`w0%NGs3xh~0@PEcx zMuh(cnvz)$N26VmUcBa!JQ8%*JTd&qr?A)EE5feRrYlh|;|;$~pT1Afe0=Azv%CRG zG95a{i?7!Dpjx?k#-&0guUtmqU8rN^oAdlU03m_YqLzKvH=NR}{|7oK!g{!om$Nkp zQs(Ul)oD_0oqi&$=u?r_^aq}$C16cRH-95#f^v^yn(A9511SAsYAOXV6(?h^*M`l% zbz(>?em$J;m8O-H9nyNCE@6+1e`#+TliDXU)oy`ed)yd|YrbdmeX+P+Z~kwiE@tsk z;f_SqD?W|kp|v*ZzIyKJRTt+k2aGps*eU4V9KXkRW+XHLW%Donfiz*NY(MRV0%$i~ zez0P^^Xg4}Awp|6oXIG29R!H#PG6E&9(-%+dnYE6)69CzZz^((^+ z8Y!hhg7|+5ZnCL0E<6JRQ`O1~Z}|$@1GoZs9>uJ#T6H0sj_fk0@RCyv#fMuQPfEE- z1P}`zm@u}}L&CRL;vqWU+onKpSb)PMqOkK`qqbZ{a5}nf2RNM2=)?4*0ILc#wkd-u zcl}OcrpX4gfSbzq~^>x43s0jX{WwJXg=lMj8L6^1LKep1DI@F{idg1)+Zd z;KW)C2q$BgQzi5^R&~F8n@O>Ai|8wNtI+p1rG~EyuI?!nuH_M;***6`&Qa~_##X5& zwZ`#c`xP{;b_ZGhsDNDYn z-L*2TUxH2HY%&Lz)JBS-+F-7O$JKd&84pRhZzmC7cWjo=qc@rliRJfp*ilm zz@2xw&Qj7}?f$Bh+WV7k6wM+J+E23O>PYf%0dhbxE+OE<8`GAhk7J5-#TbhtGc$!Qs(##s<6G<*53r%Pb~}$Snd>>+ZkcGX5-qj6H$Pr)=qe*1`~=nSs43w!lc}WX_`pCJ9}p3FRx<~5$XCsQ z;vCGs;Sd$As8mWWBtiBHc0H}C817T>Uvr~TT^u&0tk;T zp!>;BcydCQR&=Dk6(cRlu80-s@^np>cd85xT=u6X^YTXD-HK?lR`C6kQ0ILDcmE!| zLPoX_f8KBIAv3NgJ`u3Oh;qSmk+-~>f3$dsvHV8Hb|Y@}@FI03+okMmzan<=bhtK& zMVSX9I&v5d}ezPIcY?+Ci_qa^Qn5DH9S7SU(gmI zGqYy3%+V=6o|@Hq4r(XjeEwk{{;7%n?u?$m2K7zzMy84dV8^`QGpN*Rqc&F7RwzHN zDeAqt#ERn!rQT#G(~7(b88HQ|Pp5D>jwX0zHNHnf3Oj)gu`E!7Ii1P=Hz8^m+J#uRzCsVP?|zFFF)7$my; zQdpz+Lph<}b7(5F-nkjE+b!>e@+|$EtRODy)WBJ0YDIOsXH6Z!uBTy7z0HG8P$oi9 zQHj^H<@N10!d9SB+Z_3K12pA-L518G>lLQ6Ogkz$8u*KAE3TZZrzKbQsOc6IWm3P1 z6Z9p}W`Bw@UNvDFYN8S7(yZL$lD*w?up*BXx+9szA297r8OPfRU;DcyyqNN+8L!{| zr6UcRq@zn8#TSOOmQDp)#!5fhV3mxVf!ALFFEI4!h~ zEr`S1B70$!lQc)ZgkNJ@#8jjy`$r7p1apX?jvxVZe6i#(^0i>tS& zfzPuG8X`H&9A*--cQM;$N9WOTnF`j641B3+Lv?yBvI(@MV&&X>F{N{=C zsUgn(K=L4Z*YUf-oi((+XnYIR=a2^k7t-qY>k<+&F zOZ{iOY6OzT9vxOMxSQi1bFn#(t%Y}zr`=qMjV<-jMrW?>7w*hLtzX=IM~B)pg>O`}(pcFY519p_WousB;k49g_`W5sC8+Ilj4fxmB_zUkBpZGgkIl2%e-YsG!DZe#h%SYNLmaL?< z?Oo^0aCK1Z-8)&N%YBW=*%;$967E$|ZMdXK5poo{VBb5GS&o z-``@fVDN`|ezZ?SNr>&#{?&*!2+p=!Q>IslYSZ7~PmIeVg_j-0z`|l=qqT`IMI9rpb)j`2$tEJThNsrtEeyb7nY0Kc0JaTT|z)9j2+9Xg$gb zdC&jAdTKRJJg-J24y&v#7 zaAkJ!jujou8=r^rdOvDzO(Y6rGIKV%kot#os%N*!y<{~fqJ-L4+`b~;?(LQEz;UdhNR;y?otCWR z8)IeaTHfcLZ{59fTf8;pdh6ROC@p;t7wzMvB`-(Ok$lPGNN$3=sLs?Iv3)Ji<@tgw zYg@XTGZWceiqiDuwk9{n`+OQuwUoJ8Ih5a%Ku?-E6Ym0?)v8io&S2lyKGIdKQv

h&9)rU5BqDD66Ltt1z+?NLx^D{r(a>S4`yARE zMkDetu(LOr*1UwY{nI%cuNP1%1K22l`->P1Ucb_axRPO7%OB|L;f*ewG(VKZd&7Is8s16NzyIcUATs;{(;0EttL@}y&(_kvVn~U`rdXxVmFk*t4QAvIYGVK6-)Rd z40jY&uH=~LXp-DTaYay1$F30e1mDd{<0F%k0u>Xn8HuL&PeAC1>wBzW4bN|dwe}nL zeeYxPaGUS%$XNkEEum)f24rMzxalcN4VC74iW?}jV*?NY0nwzeqkN4ON7OfgMPE`M zLQ6<^J^&RlZ)QP5(wOhtyZ00N{wOPg%n}xMe0JcRkm!L$A~5ca1<1$P z7PQnPg#2aGR*}~#7O+K4)kgveIqga7ir_TM9@Jt#)1zLnFE>Ugw8^f2wv5} zXUKbYR7k2o2hUejwz;{vI~x9a4d8#O6Yx{ZFA8>|s$aZjIty}KP7kODL} zn%kYBAjCe$1JU9HDOYe>GkzH9bNF)pD>&iaWXE66+87dfx#7$IC`8(G$k0)%ZqA8 zb%MAQsSY7PCRSURU}ZhTOc&i^43(KlpA({dt6H3az%k){HGb-2*LVt98jZQtbSWD+ zZg_u|QV8ve=U1R-s0f>?CqT>--RBmO;1nD#7w{yvVGk!ko@WWMvp>Y6%Auo3fKY^`@kA%uF(!Ob-@rk)}87)}Hf+em%MXVwk*3(tG5DnBO-dAo*(*QRje0dkJ zGgMVN7Dik^&p03pnM@@->@0fsEe){TJ8IfeT6~9rBz(e?j=aJ}?`^bdSB0$yqr$}3(?qB?(yg?<5fIC3m0A%1?NwxHHLp-%BcHGaZN z3~xoOgBS1+Ucje8R3vAuxxL2OvyTYFQSUu;cqL&jFKx!g|Df0=#BZC-x8ZF2QKR7^ z?#Pu87h?V>Pgq!(4ED8rfZ@AHe)U(b9MW3F-g8^J{q3p!q#(fvEjhBcK=Me~U{De} zVY((`dl*l78nUpl@dL=?g1M9f@ZTaAf{|>WKpcklor#ASfU*s@<(ZI>O>@@6uat2|Ob)V7}o)d~+p6465s@I-l|=ZG5^g%xy(D-ksy z`^Th@Y!3c|xIafJ>sy>A!|O$XAYH)A(d2CH5dKss@agQibGzOZ4cL`o=a2RtrMST{ zcDOAL%!4xf+w}``rF@Ev2t%U-^GTEF1UJggRE`|shZpy4*Te0m=@U4Y@`_l;rn4}d zB+73kK=AlC;VAXnQ29>X**-JW(x~&&*|;OfZ<|}2{nn|O9SbhNaY+EbV}3mC_m%~8 z)$$Dw=05XNh{vABYx#Gk4lWN+D{`Hv-blJIkvKuaditeG82xl7;o~{Gazf9blePc2 zm@8j(XRGsXY07#?nA9~TA$;mtk?~TD;pf~0DBvE0d_31AE_na_hVIRa8)h>^xtx@h zfJ~C@bW6xyL?j4{?*$eYOWzX>n>Rkg9qa%+UrS3AmZJK(04nY&wsjm(hskmPNlE-Q zWY}MzIV8&hL@GxDj!tJg+Uo&AB`CA^G!+$Ib$I1o=D_mxsLr0(~$kzfhUUmiuEa5}F%Mpx$^G@-Vl_5}tGNq}Fbyt8~c4 zxM!RUpYUK3Fj3p%h>R+GPtW4W@as}gz!0%8JVp6{e4{&Ir$N&6TGYH03ig6(HP5Ic z_-!{NC1DcdlRhQS{ciy!5|$}0A^QTR4uVM9^BjW{fvHhboTq!Zw;VqrGjwY~e<=Ie zvzHS>J}$;w0nOXo-8OAIbaDF&9<@7ir+L-a)8^l-y;FO;cH~0Zg%wQwI?_jvE?af< zwCYXQ{k+OcqiWYH%U<3WV!yC*WWs$T z=ETMi`gqE|j;2|KOQCG|N_`iy4fY;;<`0QkHTq0?28Jy&0_i{5(TgHWF^1NGmtENw zI}5jnNC?gy`t|F7^bQhqtx&UWz!rhkWdKFU2poj+C&p#$U{9`t5c!BIxV{;8M`g^6-AiK-}k9spPC?~htpY0Z}PDEl~e?ea!LlaWrm5cpB{Al@3_^vHJ^ z?2Rw9f9^p0=f7Aa8w6dHmS*=FKH|C6iT7I#J?d z_WmOk-A9X$M6&R%Bmcsrq&xcUB(v)FC(C)RU)O^yfq(GN1@`>P35nZP8|%j3IQywYY&x%ps}M1)bG z+EmRVJv>&RYK_KK#`&7PGoA0xom76t^Y4PNx{ZbW3w`H4v>Q0ARa$Y7jw(|4-~U(= zGvl3d`TN`GyxPAD5Aw->*PzI~QloYsu|dT1`)TOdM=P7d0lCr@psQ$;?z=gnF--UQ zvoK{9I&263E*1>(@k!fun&8P@0r9uWYCJJD+2Vl{ue${B`LIh{~ZGz0T%7tJ&Y*3)S8KI)ZS2G&5Y+OE= zBrm~S1ugU12leui!H`ZyU*IX6R=N)fVX{CU--a^mODpLeb%AJ(#Wk#313&u^^#)j#p1 z#q#)eJFwkCLWy(<5*>|u`>gV3uZ7pwT|%MvUcKT&i%?iviujxmw6Gd>xYPJ~DSJtt z1yqkKip(lxzM?2n>6%!KtG#G|>KhXwpVm!!WM+y(zu;M9(1zXl)zy}g;=oB3K4w~q zQBvC3c<$s-Nr402fA$L5qda~}-bkaRkhmrsby|&^{ilmA?ic;sf+D@Ju&^Td^5TUi z*^%&igqy~v-G(gZUuLpIG8Jtwt(%RA)*2gWE*FST)afIaU@y){lR2H%;u+eb zA67`5-){J%T)y^QkvlYB7`|&+uO|hd*zw`KH*UyWYRJN@DQ? z^RCPHHub8OKem&JzwLUA@5d^+bW(31#tJ6NB#S1|!JGx>k_*~K?@Uui7!`OqV@zU&?J!BMo^kOu`CIRL)9}0P6HWHP z#GCq4yqpsY;hW3fj+yVbyvm0(VyX?*mYSz|j>V ze3=d2u`VQY$J@Rn4av+T?-eKUGR$i0XD}lPsXL)V~n1y%$o$THEKWXy(ZfmtZZcRXF@s=c6Ks zxZcaFbG?Ui5JAWngxB6Z#v$B%jRU?DK~kXUJ0RDTkVC zZ#l*@BHyc?EuQ%Fj)SUX^6>m&g`B@1@#&$PU(W(!{Cr;U4a#^5$)8<3=CI=J%0|(N zd;xnzjJlvYAosE&7M=q)Q{u*Ud%&8Rw(gsaBqE(mF zYTIog2RTe$;p2_uwQ)8ccpx>J3PSXDyuY6pSUzRpE(SU>T3DRAW zi2y0}XY>l!0M}uW>5{PuC^;x(++SQxF~A}S=xEW|`q%=ND6bdJXevV{*GYACX5=jA zmXySu96FAU6g?JSb)P^SHj4mEJ^F4YX`Bv%*zH832&Z~yXryrh9^;!NS-ec&a@D4vH?)F^# z87Myg1%b{JyT_Bo@pYk^ftbAvMF3&nfnat&^Of8=Iz!x2==b1HevarX7W))O7f8aUro7G82sxVaFdbPvFz#OS3!Hl5&qKGW8m2SP^^oEb!$ zT`3I~)aQ||hGm#&QnM5Jlc%53w1$|}|6RLu6In@lY6^&Ux>tL(uHnqk^WwKMMfak!+E9xRKGsvJ2mID!OM1LW$)Yh7`pqmVWjUj@VJxX9LaCtMi7Ud$vj ziOfF)lE8gNm0@uApRG3R1QUH!vu4tW9p#vo%bMl*{V#(?c^skpMuIB&e6&A3Qp*WG zXg4${F`L=JJu%R9+*X}gD0EFm)7so@35RfOXGW{` zy=2t!KpjPysO&R|7nV`TafM3s)BL&1S%ElnM0~`Wz?u?gH7Q;+jT_I^K{5N^*W||1 zSUaD{M`jjP?=E=>-VFVIa+KMt1sj+fI0gRf&;@+!~H)5YUmWfHAs%bYuHsvu5K|7+wJ`Z znt3?iO3)P7rI>Gqnk??hjM_kSu+9ZY{PR<=^;Cfe%yjJ|6Q+9#02Q9@&^BH4KQ46mlTvJ^8BxEh4%C2Saxa{d?Q-6;rF| zRpi|0TrU_hdpk~!Yc+=mg>#iY8p(?jsC+$>ztm%?zEEgUc{z8Z@QY8SOGA?%|H@e= z^ktAUGU#9^1i%n86r)rA2yxCvDQ8D7eJgA0jnI07dN6==o4}46?x|(a$YU{w22=ywpIUNEjJP5>BQdU`(ZlA_&7%(hjtvc0Sarj1nO`%S;yMXEp*OxQSLSSOjm z2Fb12sVbw8_V*AT4_&fKdB{zOX4#LQ{?9n`R#H6HvMt?~c|yM+B|njt>e|3S?CMOb zD~S_p`x|uIHmQ5Ne`P45Y*09c2O>Bl5|SngnvEc{785dn8Zz-p45|ib5fhhD#SGURR_E-=27N|4?3G zMFi8}5gU`n_rD@Od{CKOUv~9JE@lPnztmPYfjlLuUuYSuPzy7OIm1fO{UrE6_uo41 zt1itB*{lo6XD(JhJwU-EZd!rzYtPPXp!yLi&_$`p(oif&fhm--DbkkXiEMvrOW^xs zvk^x&tZ`Rlyf!rx^!}J8qr_Ybj?oJZ1GZwoJoRDFhl-BO+~lmu)WkSEzT5c- z4~kh}7)U`osidTo=mg-|I#8>@Oh`ZZY+TgtSjp(u0c#HE(;qW6M0kJPSGSxMM`g-_ zb>>f#+B$Lej=+uDPg&a~=AONLqtOkD%8BwD`n!iH2e|@b>*fKeX4Y9!a2_uW6WC51 zW@1^VP9J*wwB?>@@!$P8HiuEx@A@+yo(lB7I5IxsNwy!)5g6^TE+ zgeYtdL6zZcUS3w#m9sD>0MR)&AK%>%x4bnB2v!EMmk5{#GDZNwJnGX~yrqCxe*d_I z>|&f;-UB2%i--;cGJh>W9bjYT(iy0626mA^tsMAx2~hC$>(|j~(dg?J>5oNS58t@< zWxbf#TDed4eCLpNq6(#Enjwy z)EKnO)Z;jKehjvjt%!8Vv(Z?q-&>N9c+hrxc!rF1$1lBa^VuXrrun+dZ4M>MmD()3 z(VCc~sTPN=Z%w9XL~RFUW)t;aX(m5%*qR>Mb2)RFZASYhBfEXn32Ary7Tn&PZ+5(` zE$No>y5ao(P1qg8aWoj9NfV#xPXb-!2DdHx|AWX1g7yJpXx*`&Kxfl;A#fD{dV92& z6cXc$;q))6YsD~Y`RUW*N!21`TyI&3jEpQtTLr1d8o(D|GOX~|nZ@SOPj-X4GsMCm zk0u}s#NQQzHcIP`aJbeTZdYvMWQ?QfGyYQPG8QdzzlX=hqHukpvn^uN_OIe-JGC4JV=08n*S=6D!TSg6MTr$#W4=9gRE`7AjL*G`If$e*kk*5Auzh;XZ1x2v+l+8jY}MG@;!$P+6C+0{>RYJF zmw^~XK~45^h|gRSH?#awv_I|&fkx2ub(mAm4TdhqX=gQ?Q+aU*5J`D~tOw-iMe}iC zKeLNmG>8MDcM~7}#DFX-A9NCxaT)?dV+ApsYP@N%5m-!Abyx6D1^S-~bNu?-B4o`UmwP|(kQ7j6o6eNA{~A;-Z#cC3#o?P@H4pIK-KM;?ro&6g zfBKNVQdC#P21fts&jx~4^3Spx8S6b-s_jO!tIqm#MEjYSd z{5C!~*5jiEZ_1WF-ovA8eR1_4gbaB^PSMztcyTM_F z^WbCeTl@pR`BegM_V-7*0WiAF?Zw~x_t&5P$FKYU*RN0F*WU+ye*C8bMop=XQ#Aa^ z;^)F2P58~lec+#04+U$p`06&kTzWEE^7=}P>+mns7L}s1O}clrJDp4~vqGNd;tQLq z6>K;u`ba5y;4eqw=^1tcd;vG(vGta_72N*3y{VeU2E7qahq!lt8Xi>i{`>G=R?G2X zTJj7}aIs?E9@UBy{kU?~3SYHL8UK7?c4I4Rn-wx9^oxIy+=G{8UKJVZ?!JJjy!U4B zS`Jjj@8uep=|og+l$P~={LWVwSmu9~R)kqh$D-m>`Ilc4RAU#r?iF3UVj~Pi1AaQh zmED)dG5}ZHeciU_*!#=tr3al?XE?5;Og2m2*k&iC&aZ1(CiGJ5?)l`iA@Ip)XxlJh z!^%S#H`row8cR$zek6J9Vfu+@NI(7_)!t9I4}qH?H+2L$ETc!T_;ynXxD)7Q5f3>H!P ziO-Eyi%&LM6M#H=`PNfX3nu}%biZyc z{3XWwq1`A#wnJLSefc|020T1GKVoAd+Ry;Cq4p(+>6QL{o9;DUYrjX%PO^vl3?Q}p zF007>tuC|vxOXJ!RO8Y@c_)~#^H@;%DV-f1WQ+$oryXzu%K)jwd3_k|J5B6n;H)HT=6|L=P-?L-VMF+)t7i~hr(~*V-b$f! zRKpHy1M3F6x{hIXlAQSP`Mn|6&?un}F@Px$S5ahQ%?3eN=*1anIVcDS`-Nh3ohvv~ zG3~)8>Gt;amcZRf#R@V9CZ|?h>8m!WQMW4>Z)nSW={DtRY;dyL`;5<>P5Zd=XwX3U zg|d`))?2Sdz@mw=sBhFcoM1Hw(??Jcj=(dCPEgW!p{ArJNPTw}F9}OF8AZ%?!=5G>PfTdO!ZWpVsVCG|R+~d%-%K0s;wji68m5=Hc<6b*!Tmh?YeSqX*M#R@6Z!-%tfQMRDm<}Axah`kM^E{Mt4SGQB#?7%O z##B>0O>1IY_IZAlA!kr6V_pB3??Go{g}Th-qjQZC02c><3Wr>yE(JsdUK?cyv|8>i zbA7r{IJJWlM*t1$y5)>yYXcLzlqOh;AB65{@kS8J~fkx-Q(psX zzK>^P`?i+685wOp$#2oCJ!nlWes9B?ZsRl$^qCNV6VM3{#dcm^FIa8vp={a`TKaj5 zT_Y7^bIkj}^gbf91_ycJ*N+cALS&w}E4cAso$I_1zz_tGGsp~qgv*VRLQpl1XJ!)S zUL+&W&v27D z%LG^aM_TZL{HP~$w|M#e$a3w}yX~N9?rsqa3VxpBGo{LObJ^&HG^a81MweO%t*W>c zdaVm5`*S4Q6{dA9H4|Oo9be91v8CL%MW7W;wP5eJN=oBo1@-&yt6!bV3Ze>L^CPV` zW_@qd(8#$W;GF%#3!uM~t(= ziwb+Grf>I`>pb5%sPF_LE=Uja;h`P=^^0;Ib2+uFOAm-+jF-=$JPZv&q#a4*#G@W~ zcKdMFWb-1|=EWWH1CGRKm6$vaHlpIkz(q|Ay?~=rLLh-u0CK^m$-`d~R`E5s4068k zF+3BJF6vcUV13Qv5Qlh^s-Koj)c)yJngu_fcYW8~&CKj9;tOb%fsL(iv!QKiW^v661nE<^(-OichP55q@AiLrdd(X@hI%cgJterQu@1 z=;VfR{3T+R$P$4ev2WF9yX?HtF zO4Zylls=^q~GUTXBwLSWY8Gw{+a1b7fk zW0DNf+aEEr>fZkeFfCWW;;lDL;KZkcRBEq5p`W!*LON5{ai#bC-65d2IWa1oSrV01G)wMt18uL zOX6zi7%}mj>haNC-8Cy7rSfP^Y@h$Rr$7jl%Rb))54)DI(`Ht4SAjwLm_BSOU3xnx zQ`3?kXd1%TJOSZUqhp&NPjJ}-5(t4u>b(aNGDqZ4?B%WzZp=l+qZ8jJLc8Li%Gjfl z>$4%i`dJrR9!nQR?;3pHz7}-gxhE;nwiP?>dYaTQ3Ev@dIPj_skubGdfQ+{nc0^`Xvh|JM4F{EQX0KFG{kPEjNVNhhQJ&20J?noD( z$}`z&XEGJz*87KtYxhsal$4b%&n+m3(Hda+j<_6sGoGXH3~NH~z*_P~{pCIVjq2Z6i=JzY-f23*LKj;C0a8qFv4X-( ztW7A*t7Sfq54Ef4{`O@R?-&a^vSQtm_03U<`VDP$?v%`l3DxB~BJb;l9Tu9MdM_M| zZ&hCwF!&FeT4$VmcB6>Z4jcd=^aFs=BMn<;&=k^$^#B{JtgPx%FKX$7O*gTp&4^>+n(1l|>nmMz%ihqF_aC+=bTZKGCJBC$_at`%nR2H7D-w z$G@bP8sABY6t{m{KMhB>a?9?zKpoD3PlG2lwn@u;@)~8?8hkb`zVYk3vwQ}|220Lc z?Gw4Vq{(4&={DPDN2qkfY&UfaGD{28|0UfQ@;$>U`B>#%aUGdUEYuJ!T)Nkd* ztE}hPa2bf`45bhhBl9%lDkj{28%EjECj^+1_BCqWKM~hxEpTtH@n`c_`_An8r$6;> zR1Gczk45DkxHP;efXoCVJydrL>5`T6TKCHbzO&w+Wm1m?Zi?3Z!b{VUHpis1e4mCy z++OD{Kcp0J4EJy-+v#UTEQiNhR)gXW7U(d1Mwv@>bDvJp{u&`F`(O-Z$2Sd|M% z)sppc@|iQU`pO(fHcX$1_gRHI+N0}JQ2!aeF&Ln*Uj3M?QgCJfKfpmQ} zn^q?KM@Z?W;B*I#PQu}l3iL*6s5FGl@s~7(s0}~`oio!##SYnifl-nr$nYu%OZKM1 zbW{TMbsBV>q)9-J$)CLB)>nx~E14`M?n(PUIzq4^>VV4J!c#W_P4o?7a?j=rT3xx) zI^u)KF`G>Xi9a0_I}wWDoi~M+cGaaqHW*>wwUH8!| zLRva_7ZCqXns`VePsN%$!e>e1aoleZOGOe!lA$F+A@T#vR;&MUHeMB%s0oz~(u>bx ztO($`sO$~jd0kORoJo<2YwTWovFGE9va;YkFBH&fZ^Yezn=uNK2CXX}mO+|O4KNVy zk>l5{9VC3wI`Ll=bUB3MK|E?86nbca6?Ami01R-VhPN4)$10EAcgjHXJ7c7EuLl3j zDRl)(>&cL*W&H|8&uotPj#g!R%9=+5Otp5B^}Q3xnrFX;Mz!F;sdXGUjIPa0iAuyD z>Gt6}mvO%~CNob!YVONT`_bWbzq*U{H1-V-d!)T-IGXR{Yu0c(QbVBoJ9k8vuZ_za zgUZQw7b|bZL^$7>VRLyQ=yzo~V36#)b3rT-&O!J3gn!N4Q3Fnh8dm{8a~km8qQXMr zrbXp|xNt_qWqaPeYk=`kjYwla6T)ITI(A?OykBQ~XD}MCZtn_uijMP?A?72NLjqX< zdAcSZYoIA8?kt;ZYAuJdCL?S#3GhtlYjibe@v&|Ckzxj3U9=rXE?&GCGQ*x6`7bU; zV6_7wMHhdhQQf=NRqJNuPQU!!SnGGHdxD!x>}=fFw>pi{`HGLM#69VR)(Bk>1%e6$ z98mtja}e*ug{V0aFoG1qblZXA*Qw3L(62y%!S75)#-q zSfjVUS0Hg;c%h^wAHk|K>*#6JTEFBTv5&8;=oe?*o*eMmpl9{+BljX!zE-^=AH{5c zd31J@$a3$Cd~!}B=54(@FLgV*(d}pM%YN(F@*w@mE0=FI!`jbtjf~4u_T!Y7=v#V7 zUrP9xEscR{x2q__F~66FiH!02=J9PaV#*>1#v^2vmFbn4 zN_G5}LV>kx8GoAD{ax2W*JxGN`L6KFQ&Vjd2bOAzSw4LB;!Fz7asp? z@b#WO7CEn&{EhG}UTJ2MI%lX_%iGfoIvR?bd^|#Lls$>iT0eO7>cbTRe2N{{jB3=C z3sdU6nnF(XmtAGwoW4)IIC8uB?8;xNoIB`ZbqR2LL~p78BWe7Re*7<-8I`hk3w%^BxSP=wM2TDR38ip#yq&tHx6JJ|w^dP6P|mN^$P9mey@;!8CZC$VXF2_W z?V-ZLQjW{LW_rE7Q8bfD=kx_8qPv36hU+QHNhT~rd}=Ta=xM3)XgxizRaYmk_EaWH zN9iwSYC^EiUgp2uCcY(||MU)t1Xn3VHK{tr$d5iz;xxs&lpEVFGiZOz{JBYI!{Lif z6?3lw7C6L~y_veUwq(LMaON!a*=fPfJA!7mXKZ*v>%uR+5`A#5^n?X_PHvg6!`>-N zMJ|QerVY(yk#P-E3~kSPMCoEP#u!fhfo<9Tv-cvPu&j3Q14F~=*xnVP?bdYx_Z6r5 zwjN?|NmZe8%Ugd~#bw`ZE7h%n8`p>nKc)k2rWCJ~=Ncp_F7}x2*e$AORMW?PR|w~M zFleeX56KzNmtFNSJg7_4b}>EB?pw^S=B>QA%B8fZ{quX;O?#i4QVX$`Y_~NQ z%^58D)D~5Lt90$IwMI#8=Y>8rbH!WqmH*;#d9?hZDQ%Dj-c7gj4>zW! zt>;$6G1xgs&GGV`_MeZ&E|d}O#5-jsn!NO5*Kbz(BwDP?BFgFQY**Kevg6}nwJg2< zsO*R@?Jmx{K5GpOVkoF3x$qfQs8|&x)(?+fedd)Df(3H*a__y^)q$(;E~jpKbglZe zmvC$r&2K%s<%wd|-$U;Yjs1@5Ut(k-S@flpJMBdx%g1N4SXwLA3G1uns3PqZSF&*9 z|3cXpF}~i_96MOW)uRU%=?2+3qupN}YbzG$K}T2LP*vl4e#UurMFRjglXuljNwJA3?Ne5y-}pK1}XC-9@XjR)PP*V?-}l~Pf;n6jjG)K=@u^K>bqkfYqixFWTI^Gv~@84`__xd z=WY!*I4OP6Y>+gs^3;@IOLm3>#e-?c~AC} z$Lf1-c)Xr_>#c`Z6!RQytTV+M6_zhQHoZr%%J*3hGjqS3ackLCtnx6noCO0(eBt4V z`W@3pfA4*oC52V1r~31VjLAy8wN6~T;9&?Z+>Qw^GBZmSDGO4p5`2yPSTdnWo>%pL zHvQrGZ!J_YB~3n+l!2QP^Q*SG+4u0y?fm+waV(^qj^i%stPRdJ7ZTIbRlEQL4z+|VmsD=3hD+o3YepIdV9^Js;>NO)-F zp&@3;F^kqC$|-^QF2?IKIKn^nly$SU?R%ZMaGYkVh3u2#`!3_QSI4BSv*1!wSnuBp zSJYSenk~l!Gvamx7WVuX!A|ab_qm9g&l3#xW}*>f<4D=jao^l>C#fy^=nFsp*23K%?aMwn681ph&3C<;CuZtfil09T_KItm z-osZM$$Vpw|7V)#R}G?EPc4<`@8y3`6jOU3?rjQ+q}^xVoo^pBHx$i&tLG-+GGtW0 zDH?tD*GG@--6HCrOi(^bw%1Bp9301O)e;bWC@4#RA^__VG~V98FNKYyNSe72t? zj}l}V9us10d)fKOMzb<23_UVE3rlzJ-~;cZnZr=dK@)w4FeK0+yiWl-eko*1D226L zZ**8oD8Ks+Hl3gmmP6Yrs%5|6$YXJHpx@YWBlNUlO|1+-f7;CsmU{XEY&}e0?l*1x z(h^cO*x?vc5YxfwL>j-Uu zHR_U*mLmJQoqo?uJNmj_I9D8K^!7Xi)%B8<$@g+Ryf%w1vpP~hvE}Nd%h(srnX#{M zXY|g`c)oGDM|xlGSbltZ54H+CpupbdCFSMD$e)2}KNKWbB*=>g zagk+2N(-MXaY2g zyW#+gumPGEEr)?*ywwWf)Vy<7NK1x^#h3o zZ2BrbFk=iY+6%4!sCtQ`} zr+^h1D@IGQ7dQB+Yw^|X`$o+jXEZjo%m*G*eX{}w{zldw?*xz7M-Ke#Hkw&$cR;33 z)bZez$#_o;q2e8{`>VSlu}MIaPQkur5AEzvNInltoCyQ&uw+~Qea+rIYIMhtn`ltd zB{e)T(RJfE+s+h$Ov{7qgX!T5p<|5aQq2rOZZf4Gqr+AZ;V>wIbe~d4u}&x`yuK<@ z&{ZnWzu7VyG#we~V_qBZNb$c{V+)q6j-h#eLBS|QzHNhpcnCAwS8`A?r+XL8g+wJu zcn@g))xX=6kAd&3LQq0H98ROQYhvdBR31j))S_)jR`&5}j=34>C@ugqtS6J-?3s_` z+~8w;cJa_Kza{P|qDIQWfZICb*U!Gt8Q!}&Ej?UHBXzVpx5$y4n7@ZspXy>hDB*0Z z%qFJ6wbxvMJ~(y65Qq65y@;0&x3dSTC$QYM#~gwaa&n^GmgN2ex*`}d#PeZj2OmP| zV+ED(+-KXJ+CHi#vCL6Krp8;P!iX<>t)iQ$OBP1rvGXF`rHOVc&J3 z&+6>fi85_CMR_%C&IxUSk1m7-RFtIecPoE77M|J7TorI?Nd+;6y3^eWb5_YSnd{yf zN-{mq3lb4rOgHfUjc_=$o$$t%t0%Yt?%!O^1PtXe06D!X7fMnvh(5Z21$L{n7?*`X zQ*(Z#qbNVccZ(SFtq+Ib2_j;BqW_0z0Wp8^>5EYz344J{#->T2j1soDjm8KN#8< zpHX&6tq8e)1s!?Ew>rr{0ET=FV03$snGQQ&cuHtk*cYhj<$(BLCQcOKU)UEOB9<%m zgaXVq%9>b&TQvGE-)*C*U zxuI>id+$>2yX~#Bd!KVgYS-TNaBvrV9GH-|a{P=8wH;eO-Ium=?ALnk2%=AZ{_df4 zL`_*+&c64iA5OlYMeY#En7y%q&YyjJ;&LQ%9^Jt;S;BoX48_KS>y04Kzkmcf zp`^4P@h#AE3Q;iT@)4F73H?D9fJRSDZRf14L`d=DX6iynLvPXwY!ed>J`m^JeIsKq zAK+x)??lSg-l~u%wX2O=?Ghm`Hq6Z}*Ex*zkQnpM2)` z=%m!cQ8m3)pDibwHG2e?_V;1m4%8xE zm`o&A5^;!@ePhqm-lD}QCgb%VQ^H`w};?EjW zhO_!OL^d1uTv&<-50a2drX&-|xA<7a5#zM8mJD1cA$^t1g=i0J^vA$}0v>hZKRfgO z%615qlQ>RV;%$1(o)Va^v+Et1H{&i=nob+6z5d5-5kj*h##TihN`GC;Rh#Z_M3HoFidv2S@rChwT8&@ONRHAkALr04)x6X#Lj2n z%Im8AiwC77`A{bV-UL$*?*N^O>38ZU(4lz%IVfD`9jE{cn{E37?+pb}@YtdSYi;og zPOgW+JHHJ+_RI^T6W%VE6j~-`j1$?)aOsl|8PZxj@Jt&Y8btBm06Y$; zS6A&$dPgSE0k!c8$T7_OzS8xXTzb@04>*lo5TE+G4kh;`WIq4|tP@>;825#FT$Qj4 zcX*V#@dT0i#VGF!59WZ!WP@dyfd&BSjo|2^WK@?0Rn-9b2;$1FfdQSQe8f{!p07N2 z;FRtN(zbkME|`G>Vt?Umq?Zygci=S7?heu>qe!|k8K)pgCpBb@TGL9CTX>w{Pc~g- zQ|QKQ#c}-T=PN(7gZTh}hhpnGMy@JfuIfS-&;u_836hBT>mGd=LOgvKOh^L*a>H75 z_)9X|ejB{Jffq8+hGyf*F&`ty;7Cy1Z27Vx%?cd^(pLkuJ`6}>(S(~32W>n)?0>uTA!;Ua5KIa(RPze@HU)3zA6uX~A||mVke=({ zIHMPgmkW*~68Cs08E>p^a_BGemGTm{do;$O8b+RxcJ7&*`*jR{Irr>mF2CIkF*ApR zM5H|Tk7Hin(40yoLjq$BijEegsQ?L?=?i~aGYqA|_xYsb7Sz?v%1}5$xDy zb%oexmS0(zDO}*1piqX66xK7uk3aD+i3V9Bj_D+d3O+u6n9Ui@xI{%C!UWyb(^G<# zy16;_#6uYvMA2~C1GgP%6)sFS6duH$4f@Rf>cac?*PtQOy_Sjg*800LWXT)p#in+l#F|WP- z+N=uEaznmJn}x}eU+wSwvAgfPxPE(G$=gS=dhFwLUlKQk4%b@jbF@%CD0EEdkwIK6 z#NxXYWT=oM{v0W?E-2jPu`G94^^Rv_IT_a#xVBmzEd9y7%66gCU^bn5ojf(A;0_h{ zx|j5H7+m=Uc{k}`qw`wgYktWH_Z^-+hP7)u`RjF6orcENI(It3ECpEvh28g$S0`Ac zgEfyt**}!MWWI3wdRK`FYxU!Di-i~F!Uo6Mj&!l#C9Xhl)mENvJUyE?tI_6S&&d^> zdbT&gaH-ezfSeOsw?>}38XOk38H(69D4#fX-sHf6`4yL%^P+R+E1Y`r=4KG_Q=q<4 z-?}CK9I2kKKwm|`3)=Ak%%J6tZ5{$CqV00m%89|&4|uAi?d-h%@EGF2ZOsf}sB(Ctujz z;&b|Frb-UqqLGGbi8%YQV84=I=%3nb7KPuH1RD@*bsrQL6bypo64jbh=9CL)JW~>7 zkA0Jmxa;)KPuf;#07Rg=6MAk2d*_G>)h-GYsdh7e-QC%r&m}KGfs9k*T>HO$i>FV2 zHW%!_^wL1jx3;|&$5+q=V`%C??*jx{DQG(M*8CZ;N#zg8K-qQQ#Bda)olPk8x$_nt z89+s~R$w-go)!v%%BhSq(VPNfwq2k0ugu;ha$mxyhnJMKhd%7A(&Du~y9}jzqY7Vh z%0qY0)PIUZW%;>zhiF^xLHvA-@dyGBzOM1C2ea*d@~bp=&xhV_yVlXsv3Aep-LtPnpb|{U z?xY9cviH|HO}mIzONVK3<}gloWK_!L?$yR`&ZQkZG&E+>llzWj@tvbhq&Mxe)%}zB zXLHZY2!!0npKGhG;^F2V%5xD<(5gN`AWHal$G7;8SF`viu)R&)ZozH#<}(ZX+Ky(5 z8qSGbtP^3H=lloiCJ)tyFmZ*uDvCPXh>Oe6%i63)l^}mI ze!jooJa#4{J)PoszRwt3QR^5O4m&xCVAKBsZ!l-Wcx_EhWz&3bSnDXK2qZRDX#+4; zQ4~kyN$6Bh;N*-L4TaGj%$;@clEg88Ny;Zdw5BsM=WVEtAvM15=hvC7)M=hW^UK)L z6Q^}prbOjniGWqxY*J6arPPm&&Kh+-NUa)8DBhfwK#iTWl(F6I8`o{`WfHvAR(t>c zeU)pBSmy`LwnfD?Ntv3O#+P`@Zbv~pihub-pPuwR1w7ZS_dan^^QTxLdH%jAN|OvY zbQQ8WUbP7f4GF|YqMCc)i|*U&&cm`*XWd#J%Jtu~yR@@g_>j0j!@K@{@#SrP$Afv4 zP?f&u$fcK8o@=##$N1j+Kj8_&%u6Q{62W0y={TEny#KKN?(G5snF?2z3K!BRsr%iF zc+{b4O%+$O$x341l&}9Dj^4H>#~|_Pt*h|)(xCfh*-$~%5SNBp={ON=;Q#@=l9kv5 zQ6ZBIspJYBlZb>zPs2RVH>Q*al1i$rk@YJZ)ubTZUu&N--bRT5af?eh?L>5!Zu+>yXTl^zG?S2^{G-f z;~yVcT$Lm~tPL%`m+W~p^l@oFkQ@iWe#)0#-ovzsWv^z3X2Mx)+BlCzj;u0dShI#A z;LO_BAqmrNy7fv!B4$Ug=xNdRkt^M^+dA3od!y&V1NF!lp% zp3V}^tPAXSt&t-4+F6QO_P59hKp=IQzpSg%P#{vfV) zEG~~MBwPT0B&aAi6QU8({Uf#9J2^A+*}#WsW=?ALp|?wL2oQcaoO5s**WHKn3@ zGWA8AEOj9OkxQ`7X<~a8u&qdbiR=!U!6bnS{3T)~fT!m?vV#`Tu@t$ug5JG(LxtT1 zm9EkcA4Gpdre(~ZPWDuqYBCornMagEfsV=0SlHuks*OUHE|rtz?d@ApWySspDx$sC zE`8KJCG|avYbE%wpwDAo04bK<*(wo%xOFWwDM&NDD~z}%?#t9ZJ<$j{1$X{fjqp|l z&c2H#Ok1nOQpD7_U`Ah2gj+AK^WsbS1nl(78O6`o!JogVqv(Cp4>tD{pUcRYIlrXYFUed!eMBQYFo~Yg zZk*@v%neIBdYdPw?%%w$vh}AOSFTlvkR>wp}FGbgK7QKH&yLwtksH;bpZ@%rI-rwLmb-kqKC-eN1ZO+`N zG34RjhT=VEc1?7&7*0e_qooUDx@ZD(fY+32e8pq#wpa-IvNzE$YeJdw#Dx#++2IFF z3QXS6P=p>k=>9rt2TCc+l>6mS@i_~W(&s{V_nQS;dzoQ1((V0`8`+(|Mwlg8gIncq zi=5dUN-d;{gLpd#OQ#kVse;)N`TZzs@MfJ zg<7Q*JEM2?h4Toa7I9$(gcioFd#Cg~F#r@#oK9AUd{-VZH!^8XsFDdMmAoBEsuNdN zS63s-A>gsz1}e`F{71fw_*EXCb_ei<0$7omFpH21qho3wkTvJNorl;t&F}-Wi>oWD zPx5K5MCFYLn#{FEn*H9U^`T{M{83TDYPs^EL_>P|v%*cOc2Un!(|K3iQHi?ol;-(nJpic|ZcnVg8QqZ5c^mRS=&C#FC{!|%`&Sm5Sz@T1SB zCZ~@@PsXZ#74UBwnh$wXal6*hP={H2?Y>MSI$MVLJf~CJiIr<94UH{w9F}HXRmvgI zOTdL$eT7sWKv}&Q^ci!30GJL0=@@i1Dy)gh74!jW%GPV(p`edh-;+bFD@zPF0h}Tb?xye!=zNmn=_!jE8!V zb%cn!ijW2t5;Gffj1&?^zPgUoNl?Z83LNU|fx2t!NPvMS8EN3tx93Ok393-P-5_GO z8!ouj6!__bk!j;#C>?ET_xPy|UwW`v= z{0eQ$95}1pLLo4}3l|r@Z?|2eCtfpv6p(Pnh9MAITTeGKGD51AsP)DoGm`AZ2If+D zJaKn#|16^Wk~ccK$Ch=VqNPu4=kSP}u_yH$s@|d4jSi&-!~3QD?H#pC{~>iCHhWF<&`FMq; z1pOSMgfGgFbUoWw8yj5o6MFXfY@`125395ob7Y$;0cYs(H-37TVIb^?@1;Z3n|jPet^U9%DZ+5ZDm1Fd!!4aP!;wHO$QK zfDJ(PAC$oZ*qstz4@p%!6LdJphq4fCW_NfjM*}7nWOrf%RIz~b`mc5pv8Gn5G7zt+`~>Jw(#Pp>UhPRA6`0;ay=ulD*zF>t?Ce+b4Fmz-so+fD)owwMq8bOjl^>L42{0dM1X@V+1 z&!-cQ*(LKrJd4EUrX)VK6M0!T7UAAn+i+s&CEG9*=mhO+=+0Kzi5e;fLyD7TGNv9%o!kegQ12`$0`FN3U{5cg3D7KgU{0F0y9;okC9 ziM_$|i)@>npFaSqEJB<@m;Ql>eFEG4pG8!wc_6qTWn6HPP(uHP<&$(K$B&?CiN)is zy=87MJE3Qg(R>)gTx1xma|6*8YWG}lGe5@DfNS~AWGet@6t`Rpr2G8d?&Tt~By|11 zbqV*&m8L3-o|vnE{Bb9H`R(Pe^DlG`shb5UGsmy8WT%?bnyyKW;V$9Zx3?+ql}C8* zTVs*bw=M@;U*EAW&C3!GY1`+rp0!&EBL3_R0x3uPLY>yh?oJ;20E5y64AVsIX#dG~ zl^;IbM%;}mi3B;2crbPM+|2Fc(HM6;mN%BOLp(nvB=KbjZy+5TW5~q(iU`M9xuAs+ zu8W1!p-4f=?nNAMm(O2he_rTyuR7duVZh<@Gs{CTVciMN5(+nXoHP9G;dXX0yqcE%d40kqt1CBwLa3b z$CRvXn8$E+Da&yMh4VPGcfE)#*mvMSTKNF=Z+a$VFXd>^kT&=Q^u>qW_k&8P4HMQdnmoJ~P5juDqb}+e;vB*NoFF_Xyr7qr)Z7WI zj+R?TH1q*`PGJnPII5a&&}$`SVX|UyPl6Kj2^9i$#!>KRiOvVmGu$L2%t0pYCm7rH z(*lzG+fYA-|M3;z>Q`T1Ibw2S^;W`6BU`X$uH7Dpsf*18Yca5rP?OLfl}B5(Y*I#5 z5=kD``8xEOjWM(xYmX+e{RUAQBy7?*gyebNkcLJ z=37_JW;yPz(B;P7=HCYw6&bhhi~L%e^?qvYH?wc|mT%&n_n=9V2z@iVrT6WY_iy{6 zH_Bbz614UGxv8srkGLwFX|0UZADA`i)Dz`2z4<<8L;tvoZ%_vH+DFg$yNE~ppWdpZ zzHL4_4!C4#_}4kTogWh8>~u{GS)!gXHl^siyz;tir9d*H)(P|>e_M*8zKc#XyRT{>!-TXv_o?*VN+>L<~8U1_Y&n#v9(%7s(a_tpug1 zTd6_3)*m6PSTf}il}m?nnh$J2;G%Gs-@U~CWh+}+zKDG38NMk?Kc&>mkdUUmIp(}+<9nKi!YwZsYjzvPH4NhNvZ&i7N3=+vC!-zcxMLn``d8uRDIkN}PBrEi8e&x=LKJ4N^( zOR0L*N;`0^<@={{A4oLqEWh3i&Y4iSe5+38sxG_Cv1PWWdMKbAUyD#6a>B^Z%znOQ zEcc0Ul4z*Z6)#_?(bu=NMMBb9-(toPPfx!y;1wot67MRGNo*&KNCZ&|wT*C_HoCMSZmwuM@s{kcgwBV8Niwxz}Ph z=^&MVz_Iz%K_OF z;*opE_K>d~!S;-@&N=Y)5H@syxBzjM$)}`{t}vXVq}~MLSK14dnoEeI2#9z>(1hCr zb7g!`Cped>Mf&t)jo23+jpwj@IUq=W5omne&!5(41D|PK>3fcMg#Op)<-vG*+MIsb za{JtGvpM`@yf%lkSq^+zG!58OIVi|nGjLZ?QD>UaI?7qr(E46rfGYc`D+R3G-mM{( zt6%aRPj*QkKgz1D6xMOKeO=Du%&lyXXM5N8T?Mh(DG#;&aq;`xC{gv#4*eGlgq|&q z-dDDllzZIgN4FA$1~y@Q>(@wpaNXeWa$-f?4qN<(x=)~Mklqf!M#fk;useMj8d8Gd zCI>Q$4MLWi+5JDoF~g9sXHRq4w_j&U)~;~&8{OkR2VzN&OoMgmsX{T6Z&sg{#>nog zWq*I|n3JdV!mh*EB+t#?vztHoP3&N>McO*tRfu+cv6h?7q(q%gb0015Q8Rf^-IAcyqA3{YpJ|9^| z32zcwKl-EboU~pwdznlV)uR3=EBW<0nj=3Lq!#-s3UjGHq$u8^dPP7o*B=e-&GoQh?>Alg6v5r;X+u&M=n=kF^ zhJM)mUe)&G0Q-)rcdh3iow{(6;?-@%sEA+LKZdw`r^_oYbkxvp-GBGz_5&Zv9v3bT zHai$E;Ur3N2* zIt&Cq?kc|xvH5=5$;YnV9D||!jJ_(@e`Oy?F_U`tmjCRVw9S76&wCX|lmtbS!~!bH zq#BnxKFrQd-U>ye33G?;+!mJj60)XBnDnpzeE&_YsX*4^soeTSjKiYdJf03X8`!g(@s3_iF+zs*~AYW+%@T!G=I+Uu-ToSB}Tcc2-C+E8d$ zd+%^*BXxOGSXoj&Gdh56Ze80t1 |wcB z4eg{o8*C>jWWO1GEdA(om)6k)RX8_JhEr{T8?kWh=z~HwaxmSuu$h-K!v%&X=2cbe z`}=}!E3Af;nz#9w4ra>EMM1#6iEJ`K|4!$k*#X=jLV~{r1~|+!Hcp{yM1eZ3rCe!F z@2JWsyo~58A4BxR?q6Ep>dVO-slR8%*06*t);#qpQJ1u6LY1i3z|!y`R4f!$km z*PNMdAN8WQ$YZl%OAS15ZK6KgY9vOx1bI zxvXa4`&x{WyOXZmzi+b7K~#q7`O1;qNp5hIDLi0Vna8Nkz|b%%xM>4(cNJU)6HmXU zoi!G@?O=RA=!Z*k$d;Rq{jv(@`}%gKTo@p`$m30peM92DoG`uAIsWI`5oKlc)5JLI68=TkAt8Qo85|i^jj~l-SRDlSW9i3%u$hht+QcoOW@%Tl6-HPXD%9Ua`#L=%6S+X^znbh z{<|i^eI6A>6gu#sC~%r#aueNfiOuBCN7A>yt=M?byIRO+|EK^`#_z!Tp(O=%2~`X$ z#;;3|BLp+$Uy~BDIX7Y=$T2x~@AlrWlgooN*D|M@86d;Trol$lhIPCu4u6>}Olf{Q zJmM`K)i=Q!lO`Y;vhSf#`B4; zTc6xEpU7I2U^&EG+>uz+J3Mo0AFRmbW0|F6w|6InTHBoBzLOBTO=UkZIE>C8Pqf=4 zH+<}}QA_>K&Ek?zj2OfwOLus@d8mGPXx_$RS&#o$wfH>!>r%^B)qvmQq|@|}T4JGN z>;Y&o)~)xypnzK|n6O3ahOXSXSvmsGs~b3VdZt9AV8*6;%1KTUBa^*hK%f8$kerxM zrJP5yLP|(^P2(@{oPmn{_eum%>l`>HJ@qciKBfH?DIy0gUoNkSp?6c*Zgfr*0?xoFMSxJPg+w)k822Vx>)N8 z-H@5$dmg@3zaSxCp9(X!j|24#GP1WMDAm@>;tnnQ#^*uPV@Bs$veo4}x}Xtm`n;2e z#;$a2vdaY}Vek93P;)zS#3F*?Dyv@|U$ zx=N_gd7z0#jFW?#L-70SLFK)>v6nqF%pDKp-#Q6mJX8HNtgz@l>dNVt2+@nW6uRs| z@5aGm>Ms%4OX(F zuIv3M=4ai$&8hfId4EVGvsu^wT`7Ly?JDDyVq|K@aXmf57TXW|2wohcO(3n2(r`G| zM@NDuJbS*mazFvV!a4{L@@Clzvhpy-)yG^K|X!sw70$-s^}BW~1|P%N3bfYqO<3gc&P^;Ih@X>OZ}Qv4AQRp<3lCo_`!PJsz{8|r#^ zQEmC&mdJqydy;P2t!0C`EFdH$ca>0njF&gQq01NM*RN?&T#u z6l#Lz(*q{9i2C{ZzUpI2yuir%E;y}yZR3jyM@LsCrgiJ6iqK3r)65VrgzPYW15TMm zM=?yZxo=d?_N2Zg#v9^crigleGdW|}j&DtC9MC{8pWKUzxSSimJKY=Q4yMl1=rv00 zmsWqnh669QF$_|%B}U!N4`wt(E-oC8)%nYQlKALBojkT)gA{r;)p3DaVFlM;zgky9 z@pE*tr&~zx;&1X$fx$AOeV|jU$N9Xx7o@F1(*aen;v`0#w)ak;MtdCrP(-Sw+?K^A%aecTl$qKuir*nXfKAeZ zE=RF}O@hUJHQNDwB8ad>zBS;1?bW1|n!&g4n~9*Uy{LPNWlz;r|0(k2hTAuUmD}sK z8fmDorY0$Tt*Dnf^QkNt!R}4E(YkbD6)UuplG3yFG%~4tVNW9-p~wfeJg!5P_drH*9d!o!Mm{41?&REuK((ur<|*qX_}GJEP=# zedBYiOz*d@>BPKOr)52k9|5!2i%K0FHsY@x6 zuW|@^73LMG*clGM27w;qta|}u%!6Bx$MBl2caY#7mX-HuR0@YYb zT3!C(i?y{GpeI~!JbzY_OYiaAMPXkHgL*n4cuspRKdtW_MwXYCDHyWhhkwp<&(*~J z+*Y^A|D?oZ%piIA;uC*=ry|XpTU%ni2+RM4<@{}ES90tmC62BCV)rI>(YWqV!wf=6aul&AmH+r&0+f7 zpCleu;i1#JS)p`$34K9CBmcM%UsAZlp-{ECxA%VIA1a1(@V$gLpKuUwv=gM+mL4^O zNPq<8J3b%F`T?7bhq4u$gKpjZVHWh>iaLqkJjo81>EC{&c52K8<{@sPQI z8+|UEF&)Tf#ths{B_ci@rG(Do^ zA8^dGKBXU*!`Q-Q{?(E`M&cl^N*%u!NQ~3O0{WbCYiYz6rSB;eEx?Ecc(|JMCGipu}T|p_PMiG?P6;$WlQeq_+s?v zVjKsop$I0VW0jTi%6ke5@JydrnctEfBTr=s3;_HHqYlv-c6#iN>drR}2Ef1&P7?N$ zdKBx6)0a>y)K|777Iy7W!-O^*W!KZ`=vTc))7ffL*Y=|A@YskQxO4kAT_!_wHn+H6 zVObb*jBpv;aOd4?Ziat%wu7faQLWpCO1=aE-JrG}1GT_CSExrxN0@@tg)cif1?99p zf6k95;2j!otBX)9q@$}Y_o;JPOk;k-1PELAr0oV?9HzdZNkv9Iy?SfCMXd+_LB&0= zb=17MYqRY>!50zl?NudV&Tr`b`Z4EM&ZgeA>g&Zx3l&aAqeIo4J9Ze8tdDN=vHSy; z)sri}#s1Fv?!6U*#r9j%uXFO8dggMwa(y}QtfLB+dc;yC5gT*Ko;%8yWUgIB=Q15V z2{Hm@fA%lTBs!FxZR}vldXWo+A+ebecgN1CV<8h|Kgt&_u@bgB?=H`AvroQ|X)sCA z*w$KP1x$q!c?r7LWU%OCNc6@4l!c-XjlTD2BEQGP9b$M!7d__1FboL?&|ss$pg>{U zue4}h?7$#3cX#)vN|BXl>}xe<9s8R1PX+n|iV>L3p=vUN(0NBsbC`um_o}~Pa@o1z zTisc?SCBHzm(>5DJ~lJ0(Gs#t{h7W7A-BYo(}V06QW#Q2dltvl@=H%g-wfVS;TlD9 zL0m0Xvk~u$ApU~+e38F0@4HbE=+bXZtCkcC+0k~SNX=9YNhfw~> zIz3)KGp!A**E6l;7O|(+M;0ob*+1gkZgZd6BABFIg~+9=d*DRRgA0~Sx?$6&~YD(bQz%|^DYo}Vf2elwKNN{T3AAyULKpke~`G9DbeHtQkV@%BQ9_# z=`BGMK<)0nP?j&eqh9i#x}Sz8gQ}-2f>u+1DC2~iRk-Tkft~UoS0w&OkKnKa+wXp( zH{E1(Na5*qlDIO4P1MdmFAS|29AbS7d@niJQOrQpa=PCoi!FQz>Y}GnqR5N+4%YZk zRHaub-Bjg33F}iv^K$vP7){b1TK~$=D(a*rn|Q=% zyg>+!G5Hb1C}s7>Kfl3?O$ z<3(nuU{ZJ7^^dtxqOh5glkWQTclBK0<=i7IWl$0|kliG)DyEy1N~K@&%9if2cA6aw zaM9Hce5-Bd2f&9H1K2Si!H#i`mZ>g$9IY#Q=+458l zBW3#&LmV{DY%G28`F)wh_=j^{{wFx9n#UDryy=Ix;aZZ$1>NXQWu*{=8@B~kTldV% zdrUP*A5Ice)fD)GoaS@?E}shSXQN#a3T`d_azMYlc|5zm=~Luc>b_{2kFm;sC=2`4 zno?wS9xcKx7P;MkEP+URx7Cs(wP=ubO&BZt?%CAV(ieR9M8{{&B>IO0aeme3{dk}b zfD6wh*DLRe4GV6n`XK=54rf`#`A2&4defO>ySCS&ELnT{8DD-~+2La~`|>jOdrmgQ zeth}+?5{50B=iJTATv0~$5lWSDIZj8ej#fS``@r5UJgM&NItys*J=A>mMyFN9OT+J z>tVb1W(VhXarSTRj8|XE`Saq2&A_G7H33RYiW$|Xtrdh4V>82-c5q5GyJ*#i)#uZ2 zIB#!bhl$1bLV(1HIRWEc^~CM(aAuKis0*4%ozdS68}MVXNhbK|Dd%-ce%o;Ms=0jc zjG_d4v#nbqLmfW~iP%>$KG>LleRg!bP%!^EyFly}#A`gzpU)_L6Fnq;;N_JC3z52OR-GLc$MbdUu2jror+cCdG`5zWtOn( z+VuUr6m5etNz|V5M^x3<9b&>hY_ISr_$E+HEp@_IEI39;7edLI_x&D2y68V#7fP|e zxGp=ciNUz95=pK)q0Xno{O5Z6HdUxt-|Jo;w$HHRyxxmqMc4Ra9Y#y_9N5GMWn9W* zX0ukz2;by5qU>`vXt09B2+fLLp=sV% z^-iO?=9j7V#_%JDNS&N1dG4o;M*Y%wEhHmrY2HW}zQy1@nI~@R1H9n(s5gJ%&MI}# z)#cX3|K1ZD%x_Y>SFbqHTTfldu>Mz1VR5k3`&18(QM2V%8Bvw6^V zG83)`P(n6<|9%m~Z>sa-9hfGLBeymhu@T~Z>`KAbwCov2&NI{3XxzwtxTt&1r1_@U zi*VVmcia8fuRpCR5Opff>zhRMahzQ85WB*LM&=4(@FB*2XgI`w0%NGs3xh~0@PEcx zMuh(cnvz)$N26VmUcBa!JQ8%*JTd&qr?A)EE5feRrYlh|;|;$~pT1Afe0=Azv%CRG zG95a{i?7!Dpjx?k#-&0guUtmqU8rN^oAdlU03m_YqLzKvH=NR}{|7oK!g{!om$Nkp zQs(Ul)oD_0oqi&$=u?r_^aq}$C16cRH-95#f^v^yn(A9511SAsYAOXV6(?h^*M`l% zbz(>?em$J;m8O-H9nyNCE@6+1e`#+TliDXU)oy`ed)yd|YrbdmeX+P+Z~kwiE@tsk z;f_SqD?W|kp|v*ZzIyKJRTt+k2aGps*eU4V9KXkRW+XHLW%Donfiz*NY(MRV0%$i~ zez0P^^Xg4}Awp|6oXIG29R!H#PG6E&9(-%+dnYE6)69CzZz^((^+ z8Y!hhg7|+5ZnCL0E<6JRQ`O1~Z}|$@1GoZs9>uJ#T6H0sj_fk0@RCyv#fMuQPfEE- z1P}`zm@u}}L&CRL;vqWU+onKpSb)PMqOkK`qqbZ{a5}nf2RNM2=)?4*0ILc#wkd-u zcl}OcrpX4gfSbzq~^>x43s0jX{WwJXg=lMj8L6^1LKep1DI@F{idg1)+Zd z;KW)C2q$BgQzi5^R&~F8n@O>Ai|8wNtI+p1rG~EyuI?!nuH_M;***6`&Qa~_##X5& zwZ`#c`xP{;b_ZGhsDNDYn z-L*2TUxH2HY%&Lz)JBS-+F-7O$JKd&84pRhZzmC7cWjo=qc@rliRJfp*ilm zz@2xw&Qj7}?f$Bh+WV7k6wM+J+E23O>PYf%0dhbxE+OE<8`GAhk7J5-#TbhtGc$!Qs(##s<6G<*53r%Pb~}$Snd>>+ZkcGX5-qj6H$Pr)=qe*1`~=nSs43w!lc}WX_`pCJ9}p3FRx<~5$XCsQ z;vCGs;Sd$As8mWWBtiBHc0H}C817T>Uvr~TT^u&0tk;T zp!>;BcydCQR&=Dk6(cRlu80-s@^np>cd85xT=u6X^YTXD-HK?lR`C6kQ0ILDcmE!| zLPoX_f8KBIAv3NgJ`u3Oh;qSmk+-~>f3$dsvHV8Hb|Y@}@FI03+okMmzan<=bhtK& zMVSX9I&v5d}ezPIcY?+Ci_qa^Qn5DH9S7SU(gmI zGqYy3%+V=6o|@Hq4r(XjeEwk{{;7%n?u?$m2K7zzMy84dV8^`QGpN*Rqc&F7RwzHN zDeAqt#ERn!rQT#G(~7(b88HQ|Pp5D>jwX0zHNHnf3Oj)gu`E!7Ii1P=Hz8^m+J#uRzCsVP?|zFFF)7$my; zQdpz+Lph<}b7(5F-nkjE+b!>e@+|$EtRODy)WBJ0YDIOsXH6Z!uBTy7z0HG8P$oi9 zQHj^H<@N10!d9SB+Z_3K12pA-L518G>lLQ6Ogkz$8u*KAE3TZZrzKbQsOc6IWm3P1 z6Z9p}W`Bw@UNvDFYN8S7(yZL$lD*w?up*BXx+9szA297r8OPfRU;DcyyqNN+8L!{| zr6UcRq@zn8#TSOOmQDp)#!5fhV3mxVf!ALFFEI4!h~ zEr`S1B70$!lQc)ZgkNJ@#8jjy`$r7p1apX?jvxVZe6i#(^0i>tS& zfzPuG8X`H&9A*--cQM;$N9WOTnF`j641B3+Lv?yBvI(@MV&&X>F{N{=C zsUgn(K=L4Z*YUf-oi((+XnYIR=a2^k7t-qY>k<+&F zOZ{iOY6OzT9vxOMxSQi1bFn#(t%Y}zr`=qMjV<-jMrW?>7w*hLtzX=IM~B)pg>O`}(pcFY519p_WousB;k49g_`W5sC8+Ilj4fxmB_zUkBpZGgkIl2%e-YsG!DZe#h%SYNLmaL?< z?Oo^0aCK1Z-8)&N%YBW=*%;$967E$|ZMdXK5poo{VBb5GS&o z-``@fVDN`|ezZ?SNr>&#{?&*!2+p=!Q>IslYSZ7~PmIeVg_j-0z`|l=qqT`IMI9rpb)j`2$tEJThNsrtEeyb7nY0Kc0JaTT|z)9j2+9Xg$gb zdC&jAdTKRJJg-J24y&v#7 zaAkJ!jujou8=r^rdOvDzO(Y6rGIKV%kot#os%N*!y<{~fqJ-L4+`b~;?(LQEz;UdhNR;y?otCWR z8)IeaTHfcLZ{59fTf8;pdh6ROC@p;t7wzMvB`-(Ok$lPGNN$3=sLs?Iv3)Ji<@tgw zYg@XTGZWceiqiDuwk9{n`+OQuwUoJ8Ih5a%Ku?-E6Ym0?)v8io&S2lyKGIdKQv

5- zPX2%H(K)IOOBX+&H5c2iIQ-c6mNsFnQ@Y6-L6)`ssa($;k6HMaxWA0ia-V*r>f`qO zbnF(t7=72~zRxKZj9(Mz7;xjS|FhO4`?oEwBMOSNC&recZf@tUT8gCPe@jiSyOa8& z_gbc?DvS8f&wWkL=7$F^&2=>o^tB19v5Z$#Qy+V-=CwDd@x*W;&4nr+H`7BQ24rph&Eyy&%*VP-+&nPl9f1T)23`)3YE>|fBg zeBHw8afEU${qm?QxToGL&#!^{=g(q^IJ@o9x?JPF>w_UpxryqjuBW0l=vma>O7Fwd z(Rwm*Ph_A;nX#crirJ=>qUp(s1A#GP0cYPFz+-AxrTuVhqO6Qr&s|FT?!4LIc<}|x z*mn_aavNsutV_ig)mN6GAh(_TTD$ts?fZqUctH1X8M67 zIw8l|1&hz|3(e0?S=3$=$#S@NW85N9swt*-P)MjZTy0-4!^HSR+40uVPwaE^<&`$> z%a_s9$*vDzxUzUHaXsH2`Exy6XqQq>AC6ry_vq9OH{m!*tEA5Ryf(_6uvb4*(%Zn7m#jsU>|0-8Ejy`BubvCvzX)q(S!5aQYkhk9!;ARy$9|;TQ?|lWDH3BG#9@$A#I*K4&u%t~ zo1TC6E(u=ETsQyNt|d2(0I%OD?vf40eKIr{F|o0U&z>p6sdpD5dB)+(hK7c@1239Y zH8nr;e1a1v)BcD05XWlr{??D3n^osZ=myY;%&!wDd1ywJ)c&H`-{wrzeG2}o((B0{ ziFLgsYnGFZaplY6+5=aQj>6yb9p;HsZf*sY2fJ2;yU$ORy?_4^uAnFIYM8<)`pzIQ zD5&rHI>nE$?mY%(A9x=HVPWCWgEo&9#fFbsG^Q7KDlI;lKednZJUY9rqf}G$lGmXP zB}|r$6#ZU*-nqLurGHh#D!QF3BtFpM`TJ>irpU9i^IWoCp{`#Y7?~LTS)Gvv6F@xP znUjs}Ni}Ny^VviYrsF<0j>#Y4Dk@synY`!+xWYz2t`k8^-RPO^WGVaz>|X-NXcypW zj;Po|u1JUOLsct!x)9d8UGy5V6cewG>|WfB&uIvD1hpGpSM=05v}M!JTD(mP%zAOK zl!##dja`dPNj!ev*PN9)5AIE~f5XAv&q)LEs*qPl_;jjThw6wnAT=vh?_#; z4}{!l3`P+lP9E5)bwTd>2+4OMzMj&zmE^N{uWW68>cp!kJg;^UnaX~34syxb4dSW0fL$Vyf*&2SNcN_!vziAufx(XgMFCks)N|!rPN|c@|EY?Q$ZRmiTbi zZLSLnw=qjU@(CztmWEo8hxk1b?)0br7 zA@Rj{z*`u{oCtbs&y~cLvNbRdq+*36z(wbTF(~61k#kA)slslvm-p%BX$A%c0tA}+ z9Me+l2b$F20@B#cXgWNF@d~~>?V&|49cqv$z^o_rDkkz_)`%iDAzLwaNRv8)GZWXu zJUy1IHtv?d)@AAzK~Z2*&!bdAy>$+>qf#WXl=nz}Ww_{%JA9QN zTXHTQa;$Kl?b_-D7SpDJ{d@P;0Wg=0&${uDJCIa<(dFe&9iBskE9b|_$$94;bn<%s zKHPc*XY!rKRZL9AIC5u|b#(`w=)KwU^w!X|aeFTvVmazDxqQc~8R{9)C-RzG9ZuBG zu6)VnHn^u%#oj2mwQ|8-*TN$CC%c-AS!|jYDkf~n2RI6>nIomt<#6& zzF2Ew2MHCjiXgXXyVQ;E4uvz}#;J>EoUB5Wq-3G^_EWKTC+>cCjJ1=mPxYx%{rG0V z{Rf^ONtMOh^5;AzFa4u7)`v_t#r}{0x=N|Rad~;@FP$%tY%Gu-xGkW z!?`dC3D|vC-a@7XyXs!L^kSz8*c&I@3e(e7`~v*_4fuV44-%TXJQuhwbf$gRr?(%( z7&cul+_v_=mttZvcJvK73=U0pt6RjynM`D-2=B@5U*?!Q>y>lhl+1;SdWM}T_pj`H z``;*3i%iwzsKcU66j9OlAtV|8yFw>!HlO%_fGc;^X6U`_n%#uJRJVdSUh% zYRrh;2|t}ZKIZ)J>(@ty*y+fk@!k`4R#Ic4s#ZRjrSHqdm*1aDrQ2Pwl+{W15P9qx zswlcsesyHCG`Eg!9M?=wvBHK|%g-Rd^!!hN`DmOpne*ZKbiz0ueq0-S$VBwFyJP++ z=bk+$4{rVh#$zl&zBGyTIfw=Z1U$do4`%32IRH&f{M%FA^Wgeotys0VjuZbIW2jP$UmoZcwmmY3@=l!6@!0~;4=3g^eUnOIm7;pis+i#+qx*CrGl ziMPuy8e3jcu|bt^&t~dthXGb|=G?#Eax6a~ffK+c`6y*sXgebUa~2PQjPOb z;ptYb5*xh(JMI&7d6T|4V=9jdV4LF)+h?CvU(B`ZeGKE)k^1`jTsTbBPL0mx9~sK- zT7^sX9h-nU7MQkxF}XYx=NPig|2KYq7K^XjSN+7Rr^^kG>_!380JN_8tve($t=e_5 zscJpH;0y6?9LSOd)8#}ovgiZ z)AiG6sTPm=_uJNp;#a;|YQFkaNlCaouZY<9MdLp|{mQ`~fNXWI&wjP=*EMs44u!YB z3rkB&V{0;}m`pPOj*i-yobaC+n7kG&;f;_kiIDF=NIZxg(Kc-sNr z!#f}1;%pu>Tl|4F%^oQn6qh}DDc5Ym*TF9meQg70fSic-w@X}c`^dJon(udtuuN+E zVvzRtk88f)2HRk4M1sWZm(}}NCrc#ePsWB{3jII*g})-Zjmy2>L~N#7`~>|ZyE*(P z;@tZ?;;e3brhg67nsk&Ff96VbaHkWcTPTmd2jLn(^e$UmTnuw*u!zH861mk4h4Yzi zzus&WoXU$o72Ag!+JGP;ASmdmzoLj7T#>Iqz67$Er6(15yg;g2gX-<;414S7N(T);baG_?ZLIkr6;29Q0!#> zD`tFuws-o3$}%OdrAfs2hqQR-o;9==K&J_naCgfn@p_|gc>er%1Z2Zzw-ztiqYZLu zvJb#71hNEA&>a~>bHBQBVJxT$WoQHc-aYrt@x06Ce~v$I!mGVPNizfn9i zt_p|!aVT#Sz;c+KKY4b;Kc|o7l$y-$jhAj)Y6+Cr+AP^L2s9nmq30X@yIVxnv2#4% z505Up&k1??Q}AYzqsP9trno0$ae-W!+oiVsOTZ~blaXO;u%`i+_z1#m=K0dKxN>?m z5!zz1HK1tJE6d!EK75S^!*MNZNc;=i*blI-vLqaT-S`vO7L#M_Go!wRV{^KvXqia^oiz zi4C#DK2&)sQ!E>KdFNyNv&2{fDmTqc)4=&(B@($2G>cc5=jissH6&CA(KH?#=1Pqt znh3!X5)$g7q?RGrnY+@n*||T>FB+T6w1~#%H;90PtPsfX-~3P5x%g@nXWijYy5WQ| zWDT9gx7ALcjt9KpI6#f+{Bp_0ITGUXPuKB=#UA#lI5b;*=E??^%s{ zO(J5mv`NJUxnxLXnZwEUb7p^MBT&7m;Kwh^ z;r~b5o5$s}w(r9sNo6QRDvgL#q)4PBDWsA_b0`fmHArbv5sIX+lLpPD6seR-iO7(~ zJB21ROEhZw9oM?cu%B)3?;r2$^K6f8->rMCYhBkl9OrQ!r$9a5X#eJj%N$-_UXGAQ zkVez#>B6+bwc@HpFJ2t94@NoL`dwy4h9AVsa~JU$<(#~aI4-KRbVnKA1U2RWoy|QD zzLrX=Wwn{uJ$*)FKhRzIba1oi>Z}{HKRUe3R<>}}Y}xl4 zdEaUL=T#O>c^;9#R+-C`8?h*-y1M#wgR>y>nMauzu-jd(r7j&XbE3OKgredw&0XPP zg%ow*9vZ5icz*iNaaCJf-hq!%ra_LhM@wr`_jJhKtF<|n7Iq_$#&CpRe&z1b`3yNt zf*z|3Cwc22@qZgSI&=VTMjNV6Cbvl&Q4`D+KH2JeId3?uA&%`;RyJSMd zs8v%b5*03g{``3vG=roMyy8Jui3+r$($NwhkF3V<&6_v=kr2C-8XSU6P?gLvc!W25 z{p5p@qN&iExU`LLaO#;dLD?MJ;YD){MuttT4xca!uC{NBoFviBmF;Zgfff5@l{m#r zOSH-2Eff4pDsVH6>(*w@Sw?~eyc_&s_~Y?{vY|$5S(}p`;@rDObqB)jimc;@Z#eh9 zk;S+>Tg7z4iafGIbDxA1k065q(4k{rYCA{^(F%qDp2xUM`Ni zb*mgLv=tCAX;hrJcM3@m8Vr#%jTzxVIC@*;0q8%M)5(d?729ZSZT)BktX&*2JZBa- zK7GbRe_wqf!tL6*D`O)gmHf+N)cHtyjXxs{m#^zrmml8gUk(AM(M40$Gh`>Q8PE8M zj_JlEB_)OX@T;O-#6J=Z2g-##pWo@hL4UP7M7@eSh1ndvj!+d^sCtIi{+e(cq|arc z&9P7w0+vG)3W-FBeziW=oxgv$DcdL z;)`q+l)SSS)_j^*cy}`OsH%4j$e`bnSj`9kxnU$1gY8}_CRTyLDXCBzfg>PkI3laf zk8CI_XkTMZ@PZ5LznhbYJ3c9?5<7ej;%w8R*&jcCygwLj?DJc6{BZ4D1KUe*Cu_2i z&t*c0K?>br1`vy-oEcQXrFpLU``@fZ;4=LHmD(~y3NICg+=f!_aL3^u2wPaz=X>F{ zv~_iZh@sdjk-amw|FMKE#Gm=TJb)g>ZsI4v%#4J91wN>DAml{!=bXVqH%@+iHr<6o z>DuscJ7TM`BQ(r9iC%P1vEP^ z5SNJ*qou>Mj`ioRBEZ{&z>tvl&23QF9&U0Si5dAOKU!AeL#?xd4T*ZH@_*p>1XFbz zJ$L^65)tYHGbbUu5*Um1`slGEDt9I34=4HibHIaX<1n_q4)-9ZVGbu}X2^mDM#Ot0@J*p&KJWcHnwsI5 zW-C3Ay>a>)C4aK@`3}&d2*LVWeqXSX%>;KLy2gnHS!r(?(Z(LmDxRSLVPv^J_kskW0P(tl;s`j3Qr6CY6sry{v z<635rtJ+%E;zz9ngMv^FCx<|UCVWx8#oWjZAHU8879R3kaN;!-iQge}*$t*QEtiT_JZCF!AkKU)ScYk|K zu&dz~b=Oktxo(8w+KB(FH#}$d4+sfK0#IXsWT=w-x7QnIwBUVqSv*lu9?#fn5zm zX3|9q;pgXk%9rF+GJZ}Bjzt4+JDrhCzu0Z8OR4F5P|plZI_Gb`(SoV`I5y#}5dM~*V-=IBCHlHqe)VTtY?Dt$)Z^+>`fmXZZBFjmz7JY!9<|D4$oNgH0=KF7yR@a zeIb*rn2(NyatPchUwnCEtqzEL-$8H}SDi0&B#b~>P-e=1Jo{2uRiO2SSr3zet=sIM ziR1g@cj`*~Icvm(%PTA6qobqi+n~kOhD}znBqsP~YHI4~LWq;aL%RPJF68!)7s`9; zw}HBlTmPr-E7LGXxEv>?^h*y&@A1UOp7`{a?*#}MT|Z`Xf0>x>#aRzO?e=t^w*NH` zmnZeRkA%)i>AarV(_3pz-J zQ60#`zlLIjS& z^Q8?^K#|^;JUH=<#1Pur+R`Vs@y~pX);;GCE@~mFu9hh(ZrI=sk+|5oh7KDRE?h`n z$#(hSZso$0h`mT{#1=CN)&?zX;9czYGG!(U_fzBjPx&8UegSTKq2xR{N$+6-odC0` zL~a&yMCRn{eM^L?>(_;pDI%{qaKH-T_Dh#hd}e(e=XAFvoF*?cmEET8Uw&-=Ohc@r zkjEDeE+72+inqW$2=Fjne(1xQgdDdLh?ND=;vF^`>bL z4?Exy2iL7E5am=9+RQa!>`qv?e0cZI-2Ylk^4jjA=g;kclf(FvU;Nu;#Dp>UckAl6 zM?A2l7pYFk9u@LpytY?BN;ZJKZjNM?EFMU}BgLIfk%P8Pta3L(JXF6!Xy{6oXDHXUNaCzP*3~O-YMsoKl5eF+{`3!^sK|xr~L5!pHTgJyRvoSy9~3an8ur?056g4uQx=6 zX^Pycnar`#(N8W~&?6BuEZCnCmduAqT*o`t zj5uKh{yIlGto6_jPuh`Qo0O20xrJ0uNDzHhYs17apSlIC;eJ(t9USzNnsvuy5|>od zajtN!i-vFuE73f3OpYFH&zyi)5IXc>F)4tNCmtSZlBexCa3c!COP0)ITXo_j2Ff{y z2sBJJWki9(iyVFW1!0ZJsl6{E6=9Rf~*uEZIhU4?g_2{?fVPxug(-(8pxmfJX}E(VvriN3v)Q4{LaG9(gAf z+~mspm#+zSY>i_u&{?8 zXS*>i^9)0klOt2Is+>0k6>`^!)NJVtyYc9VGrGW2sO#!Xxc%&fiXbs>*rcga(0B)vbK{E|0?AS`3RC4*F6ix@YF z%mDBZP=-`KNy0>Juyx6c*_j!;KV3g8du<^HS6|Vp!l}mNcauERk}~2jK`R%Mz$)x*7xir)vG0E+v;SzzF%>7cL!kCA29fTHYk#O2~GfQt~ zBIjW97)PK1=03kDP}Cr2o&P+n;L(ZmRwXm<8~;MO2N{b8R^yPMK66##qA>6{n_!01 z)0X95Bah4$ME$OOdYnCy9@aLM!KHyQnR+wi>*!yK-+kaY&jN!uBGcY-ZLNJf@np6b zJVGj3D2r?C+I5AAZC)7K$1P!K*nMf3Uod&1yV>ZTOxw7aZ}doTS3lx0fKoUS1utY& z#Df`D6$-Ku(?c1cO9CEW=5zT>$2Z9%9L+=wu#VRTC63Jc)7-+Un-`pjBrg(Cw;JEc zFYceS`wZ1W*BjttNwW6s^GY&=lzq&?Aj$DDv}0=w56ASHTDoh2wSUua`v(#l64^_)Rq?f;MMB znplIBoXI4OSi{BA(%Q(uA`s#x0t>xU-vtq~RwQ;bbiYO;*|)+|u=}pHYb}rcL~3Tu;MovMLmlID#x!$VUr($(H;JZxmGCoo&SksdChP%AsK+ALA?YM9^nV5i_|zY{;$qmpfAT2T!`_ zD$)W#(W3ii%v-MDzf)T~3WbUUFbZ}VQai*=sm@AZgFmSRSfY8F*mbCJ-omZ4p-YyA z+;&E={+4s-BbFh2j7+3U71?VSpEtg+>Dp5bWjESWV_SqYicETE$T!31UONfhvW!k%T zSw8b$h|=CJqNNLxAuNn^$YLsH3ffxNH#qr4Fq0;DM2)l@(I=n=Uzb74vfd$Jrty z%?e@SU}Oz1LD}NAUtT4S89kQpe;T$Ck=pDzb5O7l;h8Qvlew9!GZJ!_*G-r?tVzmZ zO*TNNFOc^DC!AvKLhMuRXq(7W3W^wJVCmYJy9R^&K48Jz5T98x@& zGIg}QH~mnf@_OBjSW)MxaXY^UJ@}xTmsy37L={$r10%AzF5MA)EtzdeR$r^oAWfJG zi*MNODA18Ge+H>_JH)V}JkzZpusI)7$GU9zFwIWcs1Gm0^?O-pdHP|acaSm7{Pt?C zFY8nw^oPE7eHUE}|1+=nd6dIni}z0Y$s4#-H@1d*hlfjp5`cc~nk&}cPgax|di6dIvNLl5XqJXDBGV|tLmM^?ovUvfQXTib-13}6te@pN=QXI=_ z+ZYrWa^d_HQWMO{{RE9ZQjtnD%+^ISY$?Vde0WopXRJXWRhYDUNDNr03cQ==tT8NR z@1*m~`8|+aldxqVO_W>Ji!JEeU7uu&j-~2_n+{gzIA;++OKi^_C6ekGjegg`=~Oq0 z0Bq+U0DZQW4C?(AOay5FHNHGKPDOzf(VB(y2y`sSBPsiW>~*}{YHNY&Mz|b8Y61|H ztLIxne7r4$?%M#kS0ZalY$=OL|DN;Lpxy(A6g*Ef#uW8*)X>UOsG!n&GL|3guZ+iZ zQ=EY)0j>dAD2bcrntG;anK{X>59);stozlOr5*N{b&!pml~lw$l+$)GQ1so<=M@&E!r7R!=pTuA{T)=S)fV)@vBZUi!L;e> z#*^0$4wUbO0bpnZCq&k$fw49kW@gdEC=pm)T3T8b*bIT?s)yaF8#y8jf;@7(>N}?A zf&HN3-RUT+vKKy^giSvegsgrp*inxTm8d10J|UY_DkVJkW<5t+?pYEkyDbQr~Y%KSg~%|1L%Ybp`(s_<`nY z`z~n0vlBW5$$_WUI-wvZh3X4onN2_7y+0qOUS6F+=)_n=oYPUXoq?pVKdOGY*nT^A z?u`YWJ4a4w*otEpD_u*J`_?e}!pcobMnjp+LqU98o;Q3XFbeXl(;=blUG=>Uy zJl4Pwg;^Z3iuR+MdF=wod?3C9r}uB<%A;}=5=|mDB4IL4n|>gBLsy>xac7CT3stdN z$0O+6B!!bl0I#98iOQDaYkKi9E;6zpW{i~1E^8SWyxWjV zFP5f=RecrcMfDXf-au?Yy4lP5A3uEPUoIrf{do+>L70ya%~+s(96+ICq~7X?VTZr> zlmULZdG6dfvx`f_#VI@&ut|09S%D>gy9FcIqboOERQ-=f~J*jLD-suqQY#zn<5$}1`?a9DNkn)xC^!lM{sc~W4`;(WP8lA-IU{oVhH$*M zvwMT&FVi!Im&XiMVreLo4}VxJe^hRou*i0X>-z*|kHdI*PcLn7ILoGdkxg(1#e#Hf zS=iH3wwXjrBVW$QXN1rxE~;Y9juYLif5g^5!~3gGjk}-n-_%5jY!+swl5&HAI+fGA zMb4e2qe9Res!9d(iITnL*J}W+!7m^P&)PpCPWL?@nP3XRvoU~1#SwcG69r3%?EofX zRDYyDtZ;{lLga4GvtW@p__8S@N)+9nY=@9hiRqn?$Fq;1rj}ujX#&hBO^fnM{4KyL zhe%LNFfkO@S}=2}Jp9apc!>U8)lxiV%)aOt5F-jeV@7xdV*CcCcY+>gP1~zdUb-Ac zI`kcWrU$j^IM7q%2j3K*45p)Y=I4;c0l_cFaTeR1>?SyI-6KVFTZEEhcLnfh5|^j2 zWb;g}ZRBCT*JafJr9WvtG zzx@8C4Jd(8*^TkuAW73LcWbwT`+;E+Bsdfm6@6Z~hYss}Le7lo!WwW^H=ZxycmF>i zPsq8BjN>uxVbAC0VitL-ao$A2-V={;XWYOgV4uu<`7J^6W^!EhK7N)!o93@>OXdSH z|6)6KT9rNk1jp=>On`CGR!R4j32%-O5#46&>A*l3fYP!#1z?Sy@>*2hC|UQjeNsy8M9%Q2 z3$tCne+wPG6*|%#10_M~H3=6|=D!u8p%yA~D++lc;3fhL*u3Ze7erN(dgqSh4ck&x z!VN^xO1K5Xw92u2pboq49R?$k8$uo&i@l5K5?cTPX9Cx;1FX|bDt?3Xf{)(^7%W1S zSt=)IfY}NqpnkO;TdKkk!2TBb0SUE!!aI2v*{&qj3fq0LRap!d)bC8=gBcBj4~mMo zAiD+5I*WXxD~OVW8%YT1gfqEAQ}YsG9jWe+;n{^h14fG>HDx$GyI*gETnFqg4P}KF zChSjshvo=m)&DDzX;_&{pzCFboE`~W$Qi}pk*)r(}!s{+^w`zkgz zRt5I=7W(a*EUT%dH@=PC*;$!*9eP3r+b75{oL~NFXx#*#4;$#1UVobC+6icI;T~Eh zbWk@tuk7fsGSlTpz?ZS+i=_kDTMaM{-9M5KHg3wg#rO_TOi2nXVMBL@FA=L`U|?Q1 zU|lP)(127)5saja`nG=D%WX}^JT^UkV@vXo&Zrptw(NmpQa#H0nC!C#+kz?%)EsTE%J!pjVgy3q`&CYCS;-|&X(Ofpz9(ca{ zCr`K;ykSRw*m&&o3;K1r23sxx^TAd%bRJEh^P}Ek*%|crjydizG|x+8yyCR{zS>`s zkH?>06s=-lYyXpz2Mp>b0}IDtvJ{J#;xwb{t>8TSS_nKFk7=-h#>^@) zu5fSEqA+y5q$w)iHq4&Q#za_iyhjVzN$S@n$2-a04Ho03O9zNY%_#N9n7s^&BCZLq zVwcG5Qjjl_TnE!#%Rp5LxN+m%2OA7$kH^Ry{jFD)X@aXks&)X~32c@9GgVL}E5p34FajZ?C4fdw79=iL zIle~|jr7q5rF#}-${Wv22g@i+3S-NMVbaQhN`;v`Iz1rin!11#lb9z_r47_2ewLId zqs!26{kE4UX;26=stE`RiYE~nrtVbY>)(9ohheZop`(qsUGjBs-={mXyPj|pN(_Lu z+n__og1+Y;i9a-hAyENljcsdO49M!m>7kQK>d68q{)ST0gA4j*Kgs%iU@v%1v7dpn z${XFu1Bl&SFiTx(sgTMJ_`hxpB2@J^L;wTg(CU+i(PQKviRnTKgwO<6TB^4CT_h%1 zSbqKRZ2gM`iFxA%x_MlZ%(qdR^BaqV#<5l-)~O^KQ&l}1Dq%q;J@`k03Qqb${8Me3 zwr8UWz&{c$qT8*hTz(bC_J*3BH7Cfe9qDzUQz_uq&bn2<*ah4dUmJyL=S0qBR8 zdr8vbnXHSp2aBkZf)Hx_;;JOp6qeotN++0VU>$*4wr$RwIYrN&S#;R}cB6f0L>7aY zQ`zuw^Ivv5R~+4Zx^0P+sK&@}$Eu>O&!sw&&Qu62HrgCqq9@eJm-x2bzI1(I>aNJ! zn)AIoa%-pGe&u0Xy)$yHE35*peC|EC{w#mx-FfG^ zZq04mGc;^8x_Z&d-4~aw;(Wd**#5vlvt+|t2Q=5FIjst~O2~EG=RYpnQaZ`ib=fD$ z-4a}P%!NF98U)TGX~1DkiebM8zq*s)R0O@hfilMy)p{vHS!Hk51WoGx<%3X?|y*Xunv~RwP z6)pT*$mP2oYaL_{4SeEI8+deBd)L<+_h4{U7VOB>ESH6Gfks#-!w zve@W)pciHWHo#_=3Jg4<^PYZKd;4?l)e-OwkYR-5XSu$&jV=0wYKEG&E+TZoJw=DZ zI$l@TUs0{y&dw6EueKjU)zr(TtXDs+_iLwwp40P{jpb>2i@oGd-Y_>3svLE>J>oh; zl=kFf+~IzUM^Q%)JIj2hiHJPyu|3o!e&ud|jm^H59zA>I&M)2Gb5KNLeBqbHRdHPA zx^3+hKU{kl=w5Gp4r38#x;XTABU3(Vt)iP?)6D#H^vm ziAueY{SfLl@tv6<`VrD2O^a^v)xKoY`8r6*yqnkHA>pSG5o5pE0Zk+1F_g+nkg6dY z{6W}Ut88i-CCv3MX+rPO+wGP0hHw5<8Ev1-!Lb3oShg4w*$mA1HKKtYRJvLNbqq@g zjTC{?Fd&g#aEEpTr#i(d{+d7{0KsTw^u2OSLq(=N`H^kWl9>MCJH7|e?L~|s47xO? z*vg^aJB6q0?g4ex(oDOyAf5=c?7&}WgTf#pT);A< zA0tW$^E=*WqrIW$3Ryvto&x5?IH@xMXnR1;rcU?-Ujc4K?Jb2qvPJO?MuBsM=A7MCDIAN4~Oqo$h zDj(pPc&sObcKjm|jTqIQ{HFG(5d+9xlWSmbC}3N=T}{($5EP?bjAXt*wv=}W{4X5& zpyIyGfp@Mc*X{(P;i4YfvG2~Qy^|LRB(ep=6t4)Q=+%r;;i0b?a7ZdJ0{)Rm zK(x}|*BYE_kG{1s0CDJKFd`9ZXjB1NCPS(RWB1aPF~iFmBr;`{lMsY(;8eq z9r4WXnCm}5jMTF+6s&@$Lqn!Zj=m7RG3H$BeN4_RM*&Li#*Dplajh}nmRtl;85Q>K zQLajL^oVKv?mJ#`%poPm6i>u!q@X@l01ssi@4DcnQB_CE{w6x=g}t-Jda%7`%;s>* zMnp&o_;u);WqkMxNEX0;&TQSMmVTEXp_qr_ME)Y?OUD0OaQ<5$tB5AFi$J;lxiEY! z7?rRH6mK%d`U|{CE0^>itF)JC1ONn{PE&I^3>bjyjgukl)Y^G7;z!X0@P`8LWj*9f zGCYUAI$>Q2gEMy)J-A>GhkOLm(|@SoqK->{EnU8f6aAAL`O+ZNPb zjzmiTq;ib0FaZ~L_#-SNY%$h3bGg=Obhz*Oa0qCx)Sc6Cac+ygt&yKtGPD5Qbk zMFX}NYw9QY0bG=xv<~&ud%3WLsLZ5*P3X4)1X_Y*jTskGW~fa1YO61mn31s?y1^Gw z^6Ume*G$+Jq)8NM3*oPmQZI6U5)8@>w%-j}dpwEsP9oOd3R9?=`xztQY@aQ>+6+g! z6R8*`n?~k&Od4z{Osb8SeOVU zx!-U5GpKgxDH5mS$zyCE!k6i3{@7AwUQNn~t*w@F{dFGmCHJeGFAPPn>IfbRZ~$>R zRMa$c4_R8qA9~QGObT_#1WO22wH5Ie?T+blThKhsF@$=)oMUR216ewfBE$ESV2qR) zy5W5&);ocFAp`LD`#VHY0e?k(u$^o)mFKNBO^a!aumG490?5P;-4)#OO{cjJk@^)9 zx*M@Z$8q8aA>0;=vC8oQ>i7hf7FH#7IQTI=9;@Fq07MrqjH-7+B&&mol-=!jiXr=K(wLUiw;+~d6ihmg_en?o&`F) zc12?lkJ8WsY)4|`()QFZT&;1Y_OK$;T)Rhu>(_D)N_Ts4ysT z32=}=0W{@*s*VK{jy&p{o4bR-RRRs}4G$)+-Uh>BGpzIcm1aINiEU2PKvB#IIMR_?T>_k=s%#TFY!9P zZDoZw{P+BE|BdP$Tq+iEoUHZCswiN|b~L+!$_ZOX(4}d7>-7ozDwOk8aX66|O#ZS( zxkjT7m^z@hdJo-xYP{}YWY8I}o&8TK&r=lDX2NamA$`<1m6Z^qkI3BiaZOWjgz@rN zm_K#2zf5`X1Yq-Q7B5>I{u9!6^lLN0HZL3xW`N;RY>6Mgi;@~%!#UAzbls412x|RR z_%{33P1}N`RAK%r&|Gm{`Qq`G(oLk052IlREIJc`ZWzJ(7+9Ddh7e_tnF()&IUl7SaEyp|E*p>g{P0i z${2@Sj=VIMxZ#Y(3uWjI?13eX2O5PwX2pais_vq&>?WeJd;$yQQ{g#P+@N@PU~hY) ze+Ri_W1~l2*hok1h#Ce0Tm!7d*VA?-Fi?8P$$3Ti{egZE~_-LH0Q;|y(k{;pZfPB=3 ze`p}j+(TUkaNeSLn%L5%1f-8ChBh*d023hB3CFLYVwRZ~5oay=K{ERh8jex90OI>L z_mfsCRJ1CPtws@m5=S`e##wrtCbWSzi{W77I5hY3=1^se*v!Nphtb~6$n!LSJ@$Y* zmNDLkbrIpsd}Iz(s`Hcmj$$LYIM>#Jc-0fZH>?6bV<$&RlrZ=&!2Sb^_MZVS4upib6iBARzl;TyU4LP@`vh^L6{Hq z78z#NhzK(b{$7(J^ohSSTc}UFRbf|EP5GUC0+aanLJ$2R8zy-%8cT30Qao}}Ng!Dm z-6kK=R2B*9x)#$o2>C1O(gibErn-p;DV-XpTBC^tmJhsKj|54@bt5@j#|gnivrcZ@ z4x_#Sqd3NIMjhuVKXo;QA(BEq8A~DQ*Fi`9k0J*BLnnzv8)pjt=|e;0L;s&7qdWA~ z8NST=y@-G=2~tY<05OYsmXGX$a;@Xc0$d`s1Fxs9e4w}Hv30pzcJ}Ex!LeG^z1kuV z={+ziN;s}kenAl~dV{p$U>+4}ho3@*r7vE`QZ<>*KJ+3(I_UlP!^rL)? zA)i5|$^GutYdEMH$v-vRFc-w&u3Yx*R<5F5*PGjtE;<^Gzli#ax`!cU>~LR%rqm86 z4D7M&REf|67>8CS_RD3@F+p!7e%8XV%V|L|6%W>td;g zUfYLfGG*s$b?gj$7w@AU>#gch^onl%Q-2I|MoN<9-NlVBYSVTWy4s()6ZE*I@QV+UQ@iEJ?S+vo^|dJ=j3cX4}Bwqo?OT>5rH4N`2cR z(Tm4BONDW5Yy3Li!%pwSt#4OHNoy|@TX3C&xnqeqPvnEw8i!P#?XvVM+e;qMn0vIC z6;|fo667#$+O2L|eSY@+Ol3E+?bR}NJkZ|lolmi!+csZ-Zap?i_P5gNu)W|9avVw}-I zeoZz}8O_6o49Z)NjvRTK+vdH>;9gX;qMl`%)wj=bu7}=a8(gB6mxyg{nJrbgTL}Irik)(w~ms1OW=M|C|&@oytXiChR|G zoPHwPEoyWO%D&zUcidXO&vxr`+MW(3k=e|x8uy0orza%ono0*w3E)g?f1eZcI=MI$ z{~E7tP;RqQ+c0b6$9rp2cZeA|3&{B$dpKuqZ?2b;bBch(!mUg6oyjIs;DMq&ab{#x+*aKz&Wnkx zw73WMg)UKE9o41#|1!z4??2Mx6@SEc)vl`+`|S3dFJzjnVYnr_TKZ^|SFhSz#hYr& zeG?=gqUMFe_UW)*JXaK-h4#lq9c-PuwjbrTQ$Kv$Wb)$HfHY^J@6MAM{DExdtEJj8$@lJW-LcywuwrNYt>_)1sd0}RS5Ax@)OW5Q#9nfpe0nH==VR3V zl;nvFDvtN+Wz|Go)XUD*uKjXOUD{MP&Bo*X+e{U_G&|G6yv+DUcWz;IDqQ?UOT(@V=HTo_O81I)jbC13-N5ynWc)up&j!L&I)Yq(7GGQ zUF6f2tH+?wkT8wbvKmG|UGPDE*|-PjI&(oKW3m#%`z@bDlaciH?j2*1ATcDR_A$qf zqjZz&;tAOtBj_8*E4qgzeU?(vvE{inBTcO0IkuafX^OpPLg3|~xX&#EnUQr$H-g>P zx^3Hkx-^h$i%ws6uh;^;SxMR_US61VJN5OK1C4t(+GRZXK6JcN+se6zGjVmscSVf} z_pIvAu?{SGGcDcE`05gdhljBr&S&^&biPbHFO}UdXYkI#yYTKrKN0_7mSs@4(p2_l zB$SumZ&t_D(_zT@D%-@aC9|0=k9^jgAM6wWRlJ3&sDxZNFvn=e#PH7BIj>J4Z|Aks zo|4^=pOfw{a{HZ$^N1abm5rL$D%wt!>Z*n=7@ChUW(kSaA`;i)UIh*e25=smKZm>e zbhYq{S1TCYxO=j$=~5fSQoi$&qNuSq=$g_jdko9{~6t9W-xwA{Nuw zn7n7+@9K4}6XV{-q-un}9MnH*@_fDN>6g-Ts(F*{jjY^RsU3GGzIAYl{VLak=T2E( z)pfP=>cj$S7*}<2a#(EGerBSAWCAPALg(G*?9rW8I`0lSCtJ5{I4ohh7t88vFdRJH zs%+DS$a&)TpGz|_bIwRe81upO9yX4$v5n*@3c7i z=z6X!VG)8niw1nya% zFFXyJ`6%N1(x zd5~D*^K$ggsA{i}QWM+#|kCj~8?*WXn;;aZ&5 zZabE(^_g-rJm0-qEN=a^?_=v$o=da&fj%|r$SAx!EPd?qTG$Bp{Pu2GA_EcKD!}0s) zj$h?AbPiWr8HEj6pfT!Tzue=755u~Waxvx{qi*^BIZSwk_jdEgj^4d6$AUlF*+}q` zeZmzE)l(<#cMGvhRao~&Li7aw6A-Gpr}k`TO~AIEqGl1*rHLJjRF{f~$kg?>W(LVE z^jZW9ej+qsMLC~Vf^3`o#!VgDwaY)3$HzBtUR0ar84&7!w=a!-P~Sp8I_BtK0t@vz zcH9k|&%7s8o}shC)2dJJt-Q*Hu(Xe&Y&;Hj6SC6zRX8V{xNCoX&NiA`yJ0+Q=bm{< zth6P!k}V&Zc+J^$qqTS7eViS!1aTP&EWPaaK9s~sERtF(=?Mm-!J|H$!gA>V?$#WM9qp@?8~0ro(U!8=re)MApL@D#Q_srw z)oH3F`#P)CM%hXNRV-UAcg{sS7@L%AwEf|;kzJ#&0#uC6Hs2Ud@OXo~n+U@Hs;IDB z5-old--v-hf^pJ@kJ0;=tX{bFO#H{uRiEP%cI{F7I3dc*`F4iJ=z%?IQ4%^vu#67@=GWzOm~teTuNj(zITlOS&8Nv*53ww%(qJ*4RqdKG`fy_2NR<007REu&X{8gF z{IOY8px*z~EV*lH-L0KYeEyL#LkHDoqaNfRl#^dJ(kphUUGIT+q|(=6Z5ds0 z7bRW6fsu1OJ>TMU1q;{TuGn@$*q=i+LiB8^#)7k{N_SlEM}HLcD=<4Nyz*N82fJ)i zQ~qOll!0L`LX1}bE79VospaSzu9zE?0k;=*`i5OaWs}jIq>?AClGeAxu>;*E7HxfY zuosRvZjEKGdWyixfJ4qBVsEr{5^Ix&u9q~_pKcX%G=I0vInCK?fy{@cri^7LubJ$v z;Z2e=-(Y;ey2nLl>hkP-hIIZfI~@5xx`|Fyvhi{!o5~Z=|+QQqH=(4@@uwHd5ts^9)Gq3#}39|vj(q$|D zmle0yVEN3QSpjnC8h)}UkzA;bN91Py=^X2It2C?U?>Qb-25qU>^WJOjuHA_5VZX}H z8C=(0w_fO4rIJPcpq^^GllJKc0n3UHU)A7FDm%PuMD6gVjLQRvJ-TZGUCa76ZWeAA z-RZYd;B{y<=B+T6=>LSqqZRFMI;-%dU zeYSQn0-AEU(yfO#yzZ`;*qfEFVDMaLo_VH1@^z<6={q9R`In2$%*dELLGU!i;2}q&h-aqtDFu3j4bnWp%60El_ZIOOfQ!DErB9Umt>wW9f z4u^+VpPAf)gQBaGy{9dHwVso2PX#(8>z)0}HRjcEGA5S26=R-mpXI(gWAcWUS36X9 zY_QY=ng8T-=4gwda3K=94kz|!dwe$ke1#7G1oBSNQf4@!LB&%XGyl{uJCngJI*L(o zsfx8#>OAi0XYNHLwYrIlmDeineeRO(2g_FfNPT8@X%Gud>&vuseg?t0ER1$|&j?73 z5edc_io+F%p1&ugpBP-e%}%|Twh*Zx&Re5Pp(KOrk;$BJPS6hfXZ;Xn2DjC}gsZZt z4nv=_b&UFRQeXFTa_IFgS$Qxi?)B#*i8JIkwRe5zWK=USanqa0P{KO-6bU$(nifD9 zGFzuyMC8aiQHHLiZ*G(Dn1CGrftxV6Df!K3j@xLdXSe*p_x#>WgJ~Z<#RlJr@2xG= z(Fqi}mE1Fs{)v;(I%wJ4ej8Rmm@{kq2S`#tP~C#*oF@L(N4D$O`h zJ~@+Ww3y7m{{}I|N6%`J#pTLgV#rEkwk{|INT)j-^se-)-SK*L90Vw}3v&Iq=YN$Q zVPN3VN1hWho8!BgeOXwV^EwJA-zHqyvq1L?O8w1wQ~1ii)IX{~2M#JV5ovAZaawly zQx}V@U8@HX5Tec4jnDKi-=}m+@|4puPeriDx6(C@T*N&EN#o;>Vm^fR>xf%IJHWx-wSIq=mS%`oG9v!A!{1r(FE)hA>_M5!)V zRtC3Rl61DqWM~tEnyly^DZ3C)U8lcexYrK-JC3&<%%`;vPB zpOm>D5vL=nZ;%&DTV<5-2*o_8i3!DxHIx1(hvFa`%?d$&1{HA~d!Ab>zq-t;uql4g zR@jC##tI*(D`2`XgU=)gg<4?{i{11v(c#ZkhfbP< zwJbMyrZ#@#p|lG!L)*|ZKjGsiGu5?4>#n~Vxy?Rf;ER@^S&2PFO@-L+Gz)t4+}^;+ zy)|;#_dp24dJpuKZn)DdiNDp<&8)d=^?mjTfXxC`r42i2TNqP z^C^CLj$M9_tE+Xf&1hvpp3) z!NKX3Mw8CIya+MdC!1VHFM)^nQkyB;Wm*9zr8(Pm z4%OSaTt2kg}RyZPY&A)T7gY_T~y;Nr1lTnxRT6*guZ5w6Z**sF`7T0q^nHj9? zsH4{%d{3RyS2tSxe&G(z;UeQYmTu-WiGP2UQ%r<`tD-BXMbvRsH9^%W`Z zqQX)U2Z}@dBT-$K9vFs39XZPATaECW88ftg0#7kPTlB^93xrV3`K5`cz6fL!DUgTN z3wc)TRl0n!>C=;uidEEQy_xW$K|k0Gz7|yXNb=CL|&f1APbWQNSh40-H2}Xe<{Wed|%1$__x&d2X|BHCKnp@u1FXq zfj>ngOs?u5JBvP&x-nU{-IrlVA;#(4U9x$~x}45^Xe0E^p%@YW!XEw}TtDqkwKBmH zOpW37%^CS9;TlKg77&q-u6f|0TYc?9vl%v*AsBkr6+G6W5)#MdCKs^(KHY<&85KMq zi~?~hTtlR_X|c9|31sMq+UeoqjRz?%1nB|#g=jGQIoE?vkMD)NdTjORg0P1Q+y7Cq zBq@fjq0!_Y@8|Y`IHZ3RvdNDkVnii0()y0}&c#r%&>{Liq|K;NG5=R3>BQ@|)c=b~ zq`KkarwIi-f1)w)@2x=3R}6RzWWs_jq|Vz~sEu5=27#gq;(oLb(YY0*>jYyBo`&h| zpP8}@(mctJre7Aeew$I!b&&dzFN83`13oJ05c;1+75;_|d-nV#yaX-5UG&CW>hpi( zuPUwE{kxRM829~)K71GhhDl52OG_m6*oqr++Y6)RT4>kGYX`?}kC5t#fLfz$ORTb0){wu`Xbnpb|MAC=eW}LpAkdUxYm1xU6+DDYZNhhf)M$N<#WfI8TteW3M z2p$NcVn9s=5lsPVKf2H{C2FY^<>gd68mwjWBXz3M>oyA74AFFfycU1u%6Wr2q82sr zO_w)RJ#?Kit{UfZg?&|`0d!To<#4dR<48?+I9Xc=nv*V16QN$A`x9!t!&|>K73)B2 zh>Z8)8*biAk#5m&0{^MrM5z`l+@co||3i}{<-piU>FT2M^kwV1U?VLscjmnsMPYK2 z8Q1RpuT}?Y11Sii^#4m`?VAld(=(4T%HiK5MDi1_C4nQ(`knna7T~!AvY$KsuA$|c zIOeTUzr3x%(ir}or9mmhDMIdTOky^vcAU?E(Gv{q9{-2Xi(I=4lyEX!ilPn^%l^-7 zL{dG(@WmLSqlf^qDY4!w_&CQKQDOH&`> zA0ch0`~K9TI`*OUQ5hi&bu44o%Fy*3FuornQO^qRsW0Ja_^m-#-RL>Y*}x^Cz?aNiFXGhvonzeX|J}14Y0e@~E4~bVezb z0t3~EP=EAKVrJCJF_4aiKXbz97pfxzss082psqybtq7zr&w@cG3Z+n70z;sk--BWZ z(7)?AK7tX4^HDQKOBrdKBytW4Oaf=wWR=E21iYF|8jfizEIs=^iB%; zbw1Mz;IHXIqNEFuvd2FSo_Zl=w-_kO%73KBAr~l*#Ng-s)4aG5aWT^LIz}iSAbmCV zfiZM(DF$IdtOsTbz6Apbq0*SF)R=nEkNzq8M&}@tYe265cNRGPc7I&=4>|!KS{#xf z!id2Z)H*0UN3B)O_+J|PWAeO*h2%Xps)Avh$@ z7$Z@Q8I$p(ZhR=n;Ms8!6=vxC;38ATf88p$oN&NZyL++ca6vK)*hT*sXtPwsOwOt_s=ZRpWlVatYbae zzr<%05p>MdlAFf3)qmCzrr-2KSKLtYA9Y)(FJn0LuV^Cmna_lpX!0kCUwSy7L0V$| zs8vV5=eP8kN8xsKD}a;zqcLR&^pYobAs6TknmM&wk@07r9o~M^O`qlVMF-Uj z>9_mix`+;mx%&B84@$HCSy`9e9D3Aa_*LF_zLR)u9 z`noC{B;Fb`FMqk}51`~1$qo7n@8tuhgm0{YV2nOA&MEo#uE4*!>rH6lc(O0#Lg(D^ z{x^A;rJFu8r;ax!{*?0quGQ5)C_dh6D1lAaJ)Q&7DVoH5z#pR^H*Si$&ipH7pT0G_ z&f-Iu+xVvfJ@u(Blpp@33`~8dfHda)nfy+DMuBqlf8wP7;(IOrmXIp{kQ1e@L$?$^ zvMah8>`xZr&+q!T0ucEWb<>#eb`9i0I<0doS%GE}oFjE1dh`tc%mwF57lDO;Zlm-S zpi(_{>`2I4@CUKYv5$~~)E`BZl&mnmWHI4glMzBh;uDPm?h5>^U4YkYYpm0WL?3}3 zFradLhU_9rxkth0-&u>^l|(Pv>{W2fv)w2l&Ih`gGu*4ki+&F>G>lWi@D?U{wL;Ug z?9^md^mBJUv!d>J=GKh=pnRjLZ*MWw@@%1s)ka#^0VrcM%7UKV|A%fb2N@e`efJ`u zrJCq|_M>MgJs1bF1KB@w*}2E;6YM3b?eo{#-y#zDP=#63dX!B0gK#7AEj2Q9cE^sK z+}$7hyBaH?5Jp<*po-eNmoBkWFqKid*}Wl4DMS%5N#Fz3v#_x zi*jAjhSmyrzZ`Aq*(dT0vR+(NQ@=XVy*ZSRMKvzYmoFhP(7yA~36~z%=qS~^;oj#v zwN>kr-|(EuliSyxkhDEMX_Zq>YSRH`ule%346+aK%yNGiGAhf>X8;9vVq3pPqX&3SKHs5qrmU`U1@@8D|RmYRc z?U&5)+Rx$NPv)-q!qt`i_m_)#wTgptMFk&&sT^>*q6^?Ql_M*x0$q-7&1f0!}~N6<&i=DG|=78RMDpP}|_ znTX7nV@Z4VOflS%y2iR@%jC(vd{(sWlGip4uUlD@E`P+Q{h-C+OA(vatSNdkI#83& z{W9D{yrY^|=1efpw-tBp24;_b)7l|;eWT%fcblg#eGmE!he~YfKKb#}0+}-p9z6Z@ zI%hbfsa&>97eC=RvaZpfYZcKHWG@p>r#yx}buC%1eQD^0E zA;-G8N|v7ySuZp{+o<2C-^GV<*hj;{V!usQ=gOO5VQ$6V&U{->6?-cS4*8h3=5EHK ze{-DLj~_b3=o*^UYxprd%!OyYP|nr5P5@aeJ9+iAC^@c zda6b)%iQfLaNc*}Ud~}%=3(Oj1@*%2AXQESanI>7u{0H6_UBL(p|Y7US636?n0-zi z#(R9c(8{rYMaamg*{$c0QirgQpq#HH9298-BT7i54UFf@Fp7wlRmmn~y0;as&2@)uRDJLiM)mlMy2>-$vlo#gqx z3zf*bK(5lVpNIOTRG&Cjfz#4 ziRFY9`VK4f;($`FyVI+W%1(z+1^}OS^WSFz>@^XXeYWry;Z<*kQkzaWbBuGNedufB zlUTN_`_ts>_xoZG7tipTp3@fRGUSo{YGmptpNcTLvoP5c$ zgC|J#1Xo<;i1TU1&;M&xw@IN%dEn$Fm{Of(4`Lql`Pl-fnnV8|>bQwfRsj*Ir*?Y;KO0^Yd#*(V|!e zF(|NmIM3xBGkvVrPxJqbKf^r#=K0&PtzdsG(K*3ka-v3j?ajO5jDMV=JhEw8c(Qk^ z_pMn{#V}5qQj?e_tO8#I*K{QoM|px=ni7d^`!C%HjPK^|d!r0?dB`tE;>ei2FU(91 zK}e?_D%|3D>#ZP%izhyknV8DvC+b5_!m$uBw9nnP8w325ybce$H*Z& z$BUE1Q`*9)yQb$0$gf0fb?T=1jp<7#vk-5W)f0ny#Lkk~4w{KllHr>tuDrWFOhbg9PIwyd;j}-BD||=Ulznv`~zr^$X>pLRKVW-jUmJp z@#`2Gm{?aSaE`lN?~!+7$HBWq!%XE|ZBK7O!&axoZ?89;-x_SHTiyHGrquZ1i&s&d zss}!P8p$ofwA#UIH*T0LJe5)y5W^l~sv&+qGf&@YedMJh(bt!aZ~hh$s!+LgN)Kzh zQs5UfZF3STqFfy}ObM4Msnj=)7FhfxT$X9n=&&U4%ZYiBHoLNIG)X2~Z~A<7hpef-uXAZ*eGKz0sGP1d zMXUT|%d_h06PL{|zGh=)#LtiXsunNn$=!7?QJ40K-NT_>6|KM}>vDV@(wvgJgbd7V z^7>vGNDQ5*uzX}!)cI6q&TVOkvh9T^o6weFu{pQpUfxR6J^XpE(bJQQA|J(<62q<^ z0}&5!o;ejQ;8!np*FrH5M-~08Zmeuqx?QFq8fj$4Qs_!fuUhvEJ{GNQ=l?RF>$7OxdXPUCJC!$!(fBl|z z2K*fLu2Y+#U6+BLC2^zBYW=2IIEaxJoM0aI{_U5a_+U9Z+ss%+iRA0* z@t(PGsnC%;GM3D(Ic)su#q?&Ldb!jUV}qsHP#B#2K6?KZv`lV8k_F!9y$K#qYl$@n z=9^yqwEUZXK{pPT4E|`mH;|T=nakDsj8$Fg`D|lm=AbmooaBY9DN2rf^Ms@MqQZ)9 zxUwj{6_);xYpoz_xSD@`i49M7yf9AyUx39to?bIrcCaJQX@)MJqxt80)%E6x&8hxu z6`8wZ;MS=3tYWkH8sZSD_82;+DZ}$m#P@?w%*=I%s|xZbkd?XJ3B97WMr&K-@KrwrzgoQ!@Q^Y zo*EvdW#4Ovxf|M+V-Vs-;O=mdD_1vNl$k$&2Br~9_;*@ z&b7AMz3*LLTSWx&x>js=a9HZ_fc4zDbJ|u`qG!&W8AH$-+d3~r&AqjRp%D70OI1_? z{rwr>uJ>eM5j{ODjCHuLQ79xNq^7=p4MwSQTv}FphOXh#K6Gg1GF4T3I0*L}rzNt_ znq@Jz%H7AOut4w9As7&I!f;U=x+_wGk@ikb(fAPQs}!zC`uViz23Tzm0`f>}X-Q&@S3u;{m`)E` zWRYoy$v0Luw)FiU0dcbtyTWKPtO3TMCj6=zhetk^VjzTp8W|Z8);zQf3JJ$goeD}y zlKR+uPl%0;ZKf<(r2Nmd~l(>{E0odO%kA<>>#OgB`vKOA1>h7v002v zOhdR)ZDa^&XlPCEx!QziR zmu!3g2p(PtO(uQVONVC=svaHSL0H+@%b+Edl>YU;WH=P$YhJy|imVA2Io;G`4dGix zeB^Qr{wN(9-W~V-)TvX3RcB6}vV*RDPebk^PEJnT_2Inod5$#*`AeZuG4}Oq>D#w9 z4c}W!naDcL-~XNu?mF4Ko=M!zd2z)k@Yu0U(a#7##Nd`VqLy@7c{!59qCK|u_R^8g z;XTn}nuqA!MrF`YO2$^hW9VpkCR_FF*{1H#_&aG;RRM4;zv(cV$zx05k_W$hsUTv> z;@tHf7zH|ecP;JmLm2t;3J3(lD`tOx)0~pWS6jnpdlzMc$>Z6xXAdhkxA`V*dRAY{ zeaWX!pF+`>iz;bu7Kh@?>wLGl2pwtejf|FT~8uJQ%`WYn8pbRwgw@%WF{T zX~K#?4CKg7RAfQ;Q9dFnZ4VneyRMCmL|Iu`X9O=*Qd^tzJO}EiRq!$zf}Xwutb$xr zHl*m=LtE#ZhWAbDdvS4d@ZC#YTeWFx)+FD)JsaPe^~JmYZBZolVKRhJUf(~ewSWKq z9plkaQFc3a(0jjzhuIu;xC?EDo|iX7om+};TnS4{-qT#5iuF<_KBIm!Xhv36))J69 zZeKo_A->U~X7ZOj#%mobh0 z?E|-8S`Km3j*X4=#}(x#|Hx3U9|=Pg?{d;a|SJ}<6@d!HOo?C$Qa zsjH(MlhEXuGiOVv$;pDc`L?E}ri1YFu){-xLDjtB;o(6PX(=Fexh%9j*9qJo=*K!B zG2q4xCvn<_5A=ASI1O)QcxMaY$+TfDjln7`5aS;XBOl*5<}=pwvNa{0?YXgT{dzq} zi4H;o(;l)VkVy)Pj^^6ug~-Sxl$eFq1bjfooJ__UWUUuK44+5@nwwJ>$R z>joN0c-d|qxXlIceU+Jvwc_gGp{EPNn4f=3k~W@R8`K{;DQfk5@=2)b_|%Sk&$>?QgzEK7z2Yu-(Js z(q2&eLQeef=~GymL}P*HA~zTCttBhbI z3{gGQ)zw>}gHOYLL~d&9>RZ~=)1rTg`*L@yI)@PGXiqtK)&2wpq z!M0jw!SH!x1ad*EL`Eis{KMWx2r1gIVxo&bDmpqC8lXBn%x$RF7^%{iFU^0iscu8#?W zh#wpq+J2YJ0WG3<$csx$O0=C5;^X6uEiEnATIkchZ1eKUCjq$)hJea_qXR8Ma44Kd z@jzA7^8Hw6=MEh!D=S3*Lyu!keT^G^Fx|!f^rDvHgNh0^e}De~fDW>SEbLF7#}@MO z^$o^0Sg$pUot+P^; zlvL6ZJ&P5BvOGoel9Q7esWxDSQ+YX3{%Ov1v*UwsRAHl-jf{*K{D@w28&m-ic@wjp z8kZ-pi4h*+N1bctshOG>eCa2ZLX0 zTbs0+ZN)P>uBWu3Voko+PztsL%t0P221G|kQ`%!Ndg}dvt;4Y1(2$Q}#dJ^*j$!@iVeohSu*05 zuy?ghp`;A}Fw-f_{Za%lt^V*Xf04m;P%a#R!wZiwwzu0g(V=+(dddOv2-rT`v zy=_}tN5`dp*RUP|L?6ngf5l4oPJ^J}V5kgn??(E;0^#N3W27uGhq$s_*3s3**9i%O z6PkMd;o)r8uV3fk;|oB_Ooo2*xCoOUrvG)ZIf!UsoxpOFOYVIQs$Pc=>+vuL1qD5H z(&_B$<2riu=>GT5SqSXR&h|TdRtO997@qcAB>qQ_9w~2^w`tVZ*H`!cB#Xt(qN%CL zDZX;{+88l^16kc2J7mey+x2Q4jcaqosBAbb0D?qu#h_T)CP!OBb8}TuY>14QUcSs{ z*7dkoOib)}Y^;fpy@_V(LR;szh2ao&9PV!x#XBrjRxUfQy4Ur`cVFQ-bBJl~2hO-U z1Kf4m>8{A4qN1`J4h%_KREqmv(#=LEyAWx05eSiGPZn@Te@L$W)P?q(`>4mEk&odB zC{obfU!q5sJF-jm5J&gZ3z2xMeUO81PUflIyESj0+~g>dYUlW(h%hX)P(+TGW|lNA z8_%p85qUvD%v1^PyKCn|UWKi&_th+F5YVJ}-59NonOuq*#5`st#ZzWl5aX2+7G&BCrU@Su#|1 zm|i-67v9q9`F*2y=3ADS;u48v)5h zDY^*XyL%TUL0@AilEX{SFq+ZJ46WM&&9@zEQ&^CdVo#n71c&na&6|4r{WLC%ioF?# z7fY3tVtcc^ir{WVQ85yl?3B9i`0XaqIEcVge$WjpFDao`uU>8K7M8{iT>|0NKI%B2 z6DYU&iX8^q4WA8Rvr?3VhL_X}=bbyTN6QRbdU8-G85%|m=Ni%IaW2JQXHcY~3#)M0 z^9>4%$B!Q;c&ZiGn(zBOc^KMrT4{?A zHfMK5=H%oIzqJY;8}q_XyL-TKtNgd`p8)5}YioI73hGCbDa9TcLX>QS?(zrr(;xh+ zKkWo$8iNIi(r-_M6>JKHAtBAm$r*I#j#^~r`RwXXsb!!y+CF~d^ziunbII<+K_CPg zUep^i4<0<&l<&p_nbA^ozzGcT4E|P$vSav+#-U+@@!37Sm{z^)E3k&0Mpr{!V7GmH zAZnXS2v(W*3k$`7$QY%*Jy7E{yV1@S2=J&Q6Z>$o&CJ-C(*ExGW&G1%*0qHjTDkO6 zss)6Ewu6Svckg1?(A3Pq37f5pJ#B_mhQ65bV4DXcCI@S&_*dID(r8&L-P)P)uxB8g zE5N(+$^Zyp9dGXyS2kXdCr_h3>8xyOLx#L9*?9nPGOJdJBA)a-PvyX)jv)45Mztpj z%uXxPvhZC5f#768J9Zsq^IgK{<7=fT9Dou}Hq|sXD;5?Jky)~Y8&S5LfU%S{98r>5 z#pV@lb?Wrd&po(<>W4=ZNNYT#C#^VL-WHWl&GjuR(1&M%6!}_!gVwID`qrb*c-c-n z?boBppN)%S6BQMG-O%7J=aq8D07WEU_~qwCO?c(t?`kb}L+up+DybU)V)fekm9M|QKP)=>hP#BCnwqSvtdx=xFRC0D zl^rQFjAUZ%>e5WYA|uP;?GXxb?j+S%7&v0b&iv?N~g+r>mCLJD#+^~n@K~>Dk#bxHWevadzbBdQa zyE)_nzv@=o6(3i|AEU0ESNy$BoZ{s*+4lMKXFc4+9Ec9aUA+ZY8(^fFtzAb(74)S{ zH-cWEq@<*@e|s~EM}6Ju4mZ`Ary`Tm;62pN0{9Y^pRbNLmy(yiR_zdkIu&V%V07g^ zTp%2_mmm2tdGNMVqad=n@+2uSKlb&-f*pW|?#Av3=(%g-BS4tHvRzunvu5t%Gu6q~ z_lv;Wqd$D#zr(7p0%}W2PiI@OU;z-cl$;zJ4h<4=h>wpie`1-*;<2$Ya9E6>YlPKZ zrLez4fS>_6Ku{FvfBWVh%$)Lpm{mQODS(VUE9wUJV^;SLECvQmy&nqoP(C2iK-7;A zKXgNAgw$+#O$`qSs&b5kbaopU83iNEgE**xs3_tcfjSQ#KGeqXD7Wq>bM_CsopiEU z!mFx{c;@QnRtk;7r8}!!xOlN!!K2?f8F8ktBOFu|~ zjdPJ#Qxm%XZ}rXlB-|9yE+uai%vDxNd{d?{B8JUN!aEC;UzG-NR`F){k|n`k}f&pZ0J zxBe>W{N~SBy1)g&=(*DlJZ~i++~BGk+wKWgR#tk>5vkYv<(d5X6S|Hf7XNX{F4qxt z#|I)rT_Tug&Qu#0b*5|n>(y$>pqlZVsq@DRfBrXm{jZAjm#&9)Ddheeu+Vsp??OY62_zW7Xjs49SwC?7-JJ#svL! zk!gpfh-XI!Sw1#YDj|?7C6UxIycBsCr zectptDuG*h_~=n7@d3{h-gZ*}@DkZ3OYIp^*IZxfjGKvnxm&zoQ(`M5ZvB9d!EV}vqXeZ3 z?_bWBFJI!ep00!tvL9SXaJigd7Mcq@%YkAtCJw<>whbMS(A3m<iaU&yUn zDkzhfoSd9+*$&{5K?$CW_dqP!-BPTHE^6?J6DQ=eqW^kS1{bDwL!GwxY9C>wI2w`- z*bShVQO7D=rZf`bfddD?ykrgM;|d58mXnhcUb%lE0T^gBmXwz>Y5kbE)ik7}qyzvf zgf(rqZ(kI^ZFyzojG5eGewQxIMH+l1vh5@QSGIk5HVje;2?7&(1|*yuC~*^XP@Mzl z6967IMr;M6kdP3k$TC*&X5~0ho`dja`0QPOKiGx?APjktpq3D130td@SFbGk$58r_ z3PfvSkO_~m6_6Z*05gH9N!mbpoi?a}SR05?4;HnpwbhSY-@}Kj6pyeltBZ@v1P+lC z);iVWN@}XKtSrrW2_7?C*?=KrkB@=I*Y%F+trQ zhK48vFX0~>#!y8aPe|az1vM3Ta$wh}9~xeW^%{j|)QUZGI!4oHZ0VuVeh{R#U9`XGm6>MaQFc# zm7IeEHVLZ^a^t;Fl!u~IL4f_i#8ancqst>TcPPoHuFEUy73+pDpB@nS9t?WU{p$8P21l!Dl)z5eFM5wmk>7eqxx z;YKNJw&oELiNF@NgSEIYMogb*z>2fvD+W zy5WAkxOnw)rv~O6O9iLTgM->&_Ku%E9gN)(-0?DV8^`wT+uy&-%gY;sRT4!3$)3oo zs;ZhxZfE12D)LnGV#rd^(9kfpwzhsR$lG${+l?ox42l-lL3d>5<_3ce*>Lg7mHGJR z*-y=ejwZtSE&Jm1w(W5`X&?*3;yI*UVRA`7eclW_bnBu+0>nA(@mEoK6%~~Yi5hcY zE>Q-wQYX&M%d0xw3v|>b<0NC>zFopoFxaqxhWh-p$ih35OiXV1>&S?p`Wj!JjUj8LoJY-)N<;~gq#*MYa11UmvRyH8&9`LynI$G744 zfhedR=N|m{nwOfLXHWupbx}^%L7tNps>&H)!cmT59E6Di%Y9Ab zDflA$!xie`G#RH1OYf4VCNaR7{_UvcD8ItOLiuM~OqSa1*|X*SU0YlGq&y`H0DM#R zN))`nrd}f-=Kzy2P*}|~2inqk*CjWM3$dyGZmKP`fl4-uWBONxc%1kl;rCLhi-+{nE#e zIq)ICIOwz#`?{g&X!s8H;PO5`KDbP_9Lp%IQ9HPZh8O?RIi9HD9iUh| z#)uSE^YUfLg$v>sV2D8=v9z*!eBEcP0AB1*5;a9}D|sR-&^ob7T_j_UUR&9_i$}S^ zcI@7}w;XM&g2_~w|EpZp+GT$7f0wIzjCNmJMd86;ivvYYdEM>PH8$p1zVCI?u-J?l zGbq2RW0Dr$O&M0I^sDW&dU0nhysc4c?A6%iw2=REqj7ySr;}5k;Wu=42vsk(>HsHt z%euJJurTmPMzMp?24dkU(ak27re^*#rX#(Hkrg1_sKZNfH6%Gy(DeP0tcsh!Tbn<2p!>+P;h#RSVd5J5Y^~7O3=R zn(2xspP=E*2UD%g<*UKE*`sNvM2-=Q* z%hKh`D?yNAI3onG%K<4^*#lc9zo5VY8_dAaFyP+3!@0H=GAKfS#%KE-JA&{D%g{7L z7~TEy`W!5(t94@VD4=Z2$1@^=gIP#}1}R>yNjdq7{27ZUPoJ8<{sH3tbQUW@Yif28 zAP)ftfUUzuKLBJHyzb@($z)~DPFC>s_4V@hCfk8RT#rKe(}os}x?6itlTCs^#`3z~ zPIFF2jTCkyVN&gYrwP4y^!psb2iE4JphOcbrJ)_^nia9Jqvgl-q(MA6g@Mn}HMd^UiYtGm0dzJACPJ$4+ACJkEB!UOYB zQdz0rFbeQb7AJdFJs=dGG7Zctsa1Mzw4LF?+i`^f1*17$iy6J^w)Z9QUWU;FZ*DE1 zS|KDMANd}0f-Dqm?b`db8s_Haq#s8X!>LoUua07IsGYZs1#$eKva&jNpz+Qusb;M^MU|A6wnaZ%GY#a8Uf>-u zL&Aa4lLXDfd6gBcC6DVx8IsWTGLzT^&J3xo`nsb!w0A%>lWl{3?(-2mG!5@h%t(bN z&n26Y+5lJ>2c#n)|7vNjU4@>P)p_>b)IA9xWzQP<+D|r%g#erE*TY>pC5$=>!UC0W%+3O`h<0>e~PmS3hxsH8bRn< z%mX5bt9CeIrman3ZYT93l7iKtOpCKB`_b_n?Qf2aiwno6)#YK{V~cH{l9Ez$bLV;h zr)EMoi|Vu$q|e788bD^0im}gK&O^LTi&0hsTfcmQ)ES#bne@YF98< z7V*D;4k-<-)9c(AkDCW&4r=1-Hbo-%vdnQ*_uF{~pVgRL)=pE1v2AU)EA;OU~ zYe1RS#NNGF_6rv;4#v+!GBUy3)a%!l8ouMRQOPQ+=^)V`7`O4W+Q)tVO+wK4vQ}0>M`ob5`RceTxpx)#qUbomm2= zi)kh|{mrX@Y3z7D#N*vlw7(c|=PNKPl$4c~H7q7Dix)MrnYZ=MQ3mpH zK6(5&UA z06SA1_~*ithzvJ$V6F)eQWrpX5Z2FSFJADYBcFai0^o>TIjm56rjz?B$!^2egsw{PDd1|axVetU9!iwqz2G*L71#VpLc zys`3A{83ust-QQ5ZzNjH0POOQOct1aj>c*x2m!@c7ds!oVQWXn=J&5bxEg*#yyF!V zG`}QFrrV0HfW$G(_-cC^^<_AXapufWk!LIvaw&qw**%9R_ln?qJ6?m%Jw}Er^{dRk z9GzL{Q{Ye*XM@#6ZS>9cWaaH>tD9+4^Wwg!B&ym^@A-TqY8HN#)Nv~_i{VB=DX1F7g8bAxFu!vGo~DdRe@Bp4`C23(9pHF>4Z z>cd(*9M6paI`Ggk*tDrCXM0thcuWF{zI7KP(ol&=`!c<4Y_t-$ujiO0p-}=dqP4Ft z8uZn*RbtZ+!!Zd^pGQ_2ZV&opi0m#7pX$D>8$+Nkh{J+Uroub zgn|bY8#M@AUj4-ub=}+cN2WwCpYhv&13cYRd>TWWCs|1g3NKog6HMcJ8 zyN_c9f(KcjF%h2Kui6V*GDusfZ_Zx3YDQC zfu1v|^QELH(6#Q9eBsPs0`d=!I_G*|%dcO#t0IrNeG7^<>X2c zvnUD!7w#PxRidcO%uLnQLRjTD^LH|V^5egZvhn#Aa=#Jj*SfhWA?%S+F~G1^3r|0_ezq8t5J5qOuS!QS_B5FgPmR_$vYr;p>ZOclrj&%9|KMRW&b4vLR<`Su|vx)y`Kk*;ym*dy)s=e zPyZTQt-5)t`8A#Drhly)%l?0BWBmFKy#Hxr{nvNI@Bcq_SpI%#O+T1QP8}KQzR8nA P4hpPWvyqmmYZvf8DDj|R From adcfb8fa03e46aef8097a87513df64e901ae0343 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 4 Feb 2026 18:18:21 +0000 Subject: [PATCH 078/158] fixed overhead --- .../cuvs/detail/jit_lto/FragmentDatabase.h | 1 + cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 15 ++- cpp/src/detail/jit_lto/FragmentDatabase.cu | 2 + .../jit_lto/NVRTCLTOFragmentCompiler.cu | 3 + .../ann_ivf_flat/ivf_flat_udf_bench.cu | 14 +- .../ann_ivf_flat/plot_udf_benchmark.py | 120 +++++++++++++++++- .../ann_ivf_flat/udf_results_fixed.png | Bin 0 -> 262512 bytes .../ann_ivf_flat/udf_results_with_jit.png | Bin 0 -> 293432 bytes 8 files changed, 142 insertions(+), 13 deletions(-) create mode 100644 cpp/tests/neighbors/ann_ivf_flat/udf_results_fixed.png create mode 100644 cpp/tests/neighbors/ann_ivf_flat/udf_results_with_jit.png diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h index 9d3849eaef..41cc4dc557 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h @@ -21,6 +21,7 @@ class FragmentDatabase { FragmentDatabase& operator=(FragmentDatabase const&) = delete; FragmentEntry* get_fragment(std::string const& key); + bool has_fragment(std::string const& key) const; private: FragmentDatabase(); diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 948ad66466..08ec3e689c 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -12,8 +12,10 @@ #include #include +#include #include #include +#include #include #include "cuda_runtime.h" @@ -74,7 +76,6 @@ std::shared_ptr AlgorithmPlanner::build() check_nvjitlink_result(handle, result); for (auto& frag : this->fragments) { - std::cout << "Adding fragment: " << frag->compute_key << std::endl; frag->add_to(handle); } @@ -83,6 +84,10 @@ std::shared_ptr AlgorithmPlanner::build() result = nvJitLinkComplete(handle); check_nvjitlink_result(handle, result); + // Dump CUBIN if CUVS_DUMP_CUBIN is set + static int dump_counter = 0; + bool dump_cubin = std::getenv("CUVS_DUMP_CUBIN") != nullptr; + // get cubin from nvJitLink size_t cubin_size; result = nvJitLinkGetLinkedCubinSize(handle, &cubin_size); @@ -92,6 +97,14 @@ std::shared_ptr AlgorithmPlanner::build() result = nvJitLinkGetLinkedCubin(handle, cubin.get()); check_nvjitlink_result(handle, result); + // Dump CUBIN for analysis with cuobjdump + if (dump_cubin) { + std::string filename = "/tmp/jit_kernel_" + std::to_string(dump_counter++) + ".cubin"; + std::ofstream out(filename, std::ios::binary); + out.write(cubin.get(), cubin_size); + std::cerr << "Dumped CUBIN to: " << filename << " (" << cubin_size << " bytes)" << std::endl; + } + result = nvJitLinkDestroy(&handle); check_nvjitlink_result(handle, result); diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index b0a36d2f73..b13e368f13 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -25,6 +25,8 @@ FragmentDatabase& fragment_database() return database; } +bool FragmentDatabase::has_fragment(std::string const& key) const { return cache.count(key) > 0; } + FragmentEntry* FragmentDatabase::get_fragment(std::string const& key) { auto& db = fragment_database(); diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index c53dc1eaba..8bae7a7a03 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -47,6 +47,9 @@ NVRTCLTOFragmentCompiler::NVRTCLTOFragmentCompiler() void NVRTCLTOFragmentCompiler::compile(std::string const& key, std::string const& code) const { + // Check if this fragment is already cached - avoid expensive NVRTC compilation + if (fragment_database().has_fragment(key)) { return; } + nvrtcProgram prog; NVRTC_SAFE_CALL( nvrtcCreateProgram(&prog, code.c_str(), "nvrtc_lto_fragment", 0, nullptr, nullptr)); diff --git a/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu b/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu index adf4481d6b..a411f68d1d 100644 --- a/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu +++ b/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu @@ -452,27 +452,31 @@ int main(int argc, char** argv) // Write CSV std::ofstream csv(output_file); - csv << "dtype,k,median_builtin_ms,median_udf_ms,median_raw_ms,udf_ratio,raw_ratio\n"; + csv << "dtype,k,first_builtin_ms,first_udf_ms,first_raw_ms,median_builtin_ms,median_udf_ms," + "median_raw_ms,udf_ratio,raw_ratio\n"; for (const auto& r : results) { double udf_ratio = r.median_udf_ms / r.median_builtin_ms; double raw_ratio = r.median_raw_ms / r.median_builtin_ms; - csv << r.dtype << "," << r.k << "," << std::fixed << std::setprecision(3) << r.median_builtin_ms - << "," << r.median_udf_ms << "," << r.median_raw_ms << "," << std::setprecision(4) - << udf_ratio << "," << raw_ratio << "\n"; + csv << r.dtype << "," << r.k << "," << std::fixed << std::setprecision(3) << r.first_builtin_ms + << "," << r.first_udf_ms << "," << r.first_raw_ms << "," << r.median_builtin_ms << "," + << r.median_udf_ms << "," << r.median_raw_ms << "," << std::setprecision(4) << udf_ratio + << "," << raw_ratio << "\n"; } csv.close(); std::cerr << "\nResults written to: " << output_file << "\n"; // Also print to stdout for convenience - std::cout << "dtype,k,median_builtin_ms,median_udf_ms,median_raw_ms,udf_ratio,raw_ratio\n"; + std::cout << "dtype,k,first_builtin_ms,first_udf_ms,first_raw_ms,median_builtin_ms,median_udf_ms," + "median_raw_ms,udf_ratio,raw_ratio\n"; for (const auto& r : results) { double udf_ratio = r.median_udf_ms / r.median_builtin_ms; double raw_ratio = r.median_raw_ms / r.median_builtin_ms; std::cout << r.dtype << "," << r.k << "," << std::fixed << std::setprecision(3) + << r.first_builtin_ms << "," << r.first_udf_ms << "," << r.first_raw_ms << "," << r.median_builtin_ms << "," << r.median_udf_ms << "," << r.median_raw_ms << "," << std::setprecision(4) << udf_ratio << "," << raw_ratio << "\n"; } diff --git a/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py b/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py index aadcc38d0d..4c63d4be17 100644 --- a/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py +++ b/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py @@ -19,8 +19,14 @@ def plot_benchmark_results(csv_file: str): # Read data df = pd.read_csv(csv_file) - # Create figure with 2x2 subplots - fig, axes = plt.subplots(2, 2, figsize=(14, 10)) + # Check if first_* columns exist (new format) + has_first_times = "first_builtin_ms" in df.columns + + # Create figure with 2x3 subplots if we have first times, else 2x2 + if has_first_times: + fig, axes = plt.subplots(2, 3, figsize=(18, 10)) + else: + fig, axes = plt.subplots(2, 2, figsize=(14, 10)) fig.suptitle( "IVF-Flat UDF Benchmark: Built-in vs Macro UDF vs Raw UDF\n(1M vectors, 512 dims, 100 queries)", fontsize=14, @@ -199,6 +205,92 @@ def plot_benchmark_results(csv_file: str): fontweight="bold", ) + # ========================================================================= + # Plot 5 & 6: First-run JIT times (if available) + # ========================================================================= + if has_first_times: + ax5 = axes[0, 2] + ax6 = axes[1, 2] + + # Float32 first-run times + data_f32 = df[df["dtype"] == "float32"] + x = np.arange(len(data_f32)) + width = 0.25 + + ax5.bar( + x - width, + data_f32["first_builtin_ms"], + width, + label="Built-in", + color=colors["float32"], + alpha=0.9, + ) + ax5.bar( + x, + data_f32["first_udf_ms"], + width, + label="Macro UDF", + color=colors["float32"], + alpha=0.5, + hatch="//", + ) + ax5.bar( + x + width, + data_f32["first_raw_ms"], + width, + label="Raw UDF", + color=colors["float32"], + alpha=0.3, + hatch="\\\\", + ) + + ax5.set_xlabel("k (neighbors)") + ax5.set_ylabel("Time (ms)") + ax5.set_title("Float32: First Run (incl. JIT)") + ax5.set_xticks(x) + ax5.set_xticklabels(data_f32["k"]) + ax5.legend(loc="upper left") + ax5.grid(axis="y", alpha=0.3) + + # Int8 first-run times + data_int8 = df[df["dtype"] == "int8"] + x = np.arange(len(data_int8)) + + ax6.bar( + x - width, + data_int8["first_builtin_ms"], + width, + label="Built-in", + color=colors["int8"], + alpha=0.9, + ) + ax6.bar( + x, + data_int8["first_udf_ms"], + width, + label="Macro UDF", + color=colors["int8"], + alpha=0.5, + hatch="//", + ) + ax6.bar( + x + width, + data_int8["first_raw_ms"], + width, + label="Raw UDF", + color=colors["int8"], + alpha=0.3, + hatch="\\\\", + ) + + ax6.set_xlabel("k (neighbors)") + ax6.set_ylabel("Time (ms)") + ax6.set_title("Int8: First Run (incl. JIT)") + ax6.set_xticks(x) + ax6.set_xticklabels(data_int8["k"]) + ax6.legend(loc="upper left") + ax6.grid(axis="y", alpha=0.3) + plt.tight_layout() # Save figure @@ -210,12 +302,26 @@ def plot_benchmark_results(csv_file: str): def print_summary(csv_file: str): """Print a summary table of results.""" df = pd.read_csv(csv_file) + has_first_times = "first_builtin_ms" in df.columns - print("\n" + "=" * 100) + print("\n" + "=" * 120) print("UDF Benchmark Summary") - print("=" * 100) + print("=" * 120) + + if has_first_times: + print("\n--- First Run (includes JIT compilation) ---") + print( + f"{'dtype':<10} {'k':<6} {'Built-in (ms)':<15} {'Macro UDF (ms)':<15} {'Raw UDF (ms)':<15}" + ) + print("-" * 60) + for _, row in df.iterrows(): + print( + f"{row['dtype']:<10} {row['k']:<6} {row['first_builtin_ms']:<15.1f} {row['first_udf_ms']:<15.1f} {row['first_raw_ms']:<15.1f}" + ) + + print("\n--- Median (cached, 20 iterations) ---") print( - f"\n{'dtype':<10} {'k':<6} {'Built-in (ms)':<15} {'Macro UDF (ms)':<15} {'Raw UDF (ms)':<15} {'Macro Ratio':<12} {'Raw Ratio':<12}" + f"{'dtype':<10} {'k':<6} {'Built-in (ms)':<15} {'Macro UDF (ms)':<15} {'Raw UDF (ms)':<15} {'Macro Ratio':<12} {'Raw Ratio':<12}" ) print("-" * 100) @@ -224,7 +330,7 @@ def print_summary(csv_file: str): f"{row['dtype']:<10} {row['k']:<6} {row['median_builtin_ms']:<15.2f} {row['median_udf_ms']:<15.2f} {row['median_raw_ms']:<15.2f} {row['udf_ratio']:<12.3f} {row['raw_ratio']:<12.3f}" ) - print("\n" + "=" * 100) + print("\n" + "=" * 120) print("Key Observations:") print( f" - Float32 Macro UDF avg ratio: {df[df['dtype'] == 'float32']['udf_ratio'].mean():.3f}x" @@ -238,7 +344,7 @@ def print_summary(csv_file: str): print( f" - Int8 Raw UDF avg ratio: {df[df['dtype'] == 'int8']['raw_ratio'].mean():.3f}x" ) - print("=" * 100 + "\n") + print("=" * 120 + "\n") if __name__ == "__main__": diff --git a/cpp/tests/neighbors/ann_ivf_flat/udf_results_fixed.png b/cpp/tests/neighbors/ann_ivf_flat/udf_results_fixed.png new file mode 100644 index 0000000000000000000000000000000000000000..9d01cb4759e03f9cf6278212cc09fc59e10e3640 GIT binary patch literal 262512 zcmc$`cRZJG|3Cb$kc=iJGa|D@lod(IsLY6rGLn!@GE-?;Aq`nc3*l{+kR%zIMMy$c z$QDAs=i&3Ye)sqK{&D|#-yV;vt3KuJJdg7@Ua#kRo!7OG9%5!-XP{6h%<5_fbSV^i zehP)Qgq{xnMa$)@CjL*(?ci}YJ;&2-o~K-_DMwDZIoUh9+1pxd@vwGrwRJqVYlp$ zhF8lD{P)-7*r|QX*Zud`X3iDs|NC37efZ86zVE-k^8j6W01e-Ne@V*F@vr^w54%+& zvE}0b`ZT7{Wvl+zhgmr>scrf1597OJ`TxzO^(*m-i7|=X)|ftYprvnQL|i^P@X{s5 zS4KQizw}BOn3$r5YZDT-7!@u3^5)GOAIEf6C#Q_my^D*Bv`yUqUUntd(#`86CVvu6 z)P1&gIn%685?TZB@w2}F*&>Iz{(qU?ihwP4fpii z9(4B?3gwi2n7@YnniH%TznwYx=-!Wg@2abra&vP>M@RQf{rp`2($(g0%x>oLFrMau z1&!E?bBhD5sw%0?Z{IGjsi}EtoFj5!vj3uWRWz@pq-6OZ|6HA|yr)7j6@ubgXt)p?c2?@hAioyl{>xgEN5^8>B#!*1tp=?eBgo5Swm z;UT2=n5i|_;pR)%zOI4BC$#O8%bz%O3@|uV*W_Hz?XLXrf&TRA(;1nW%41z+I>{QW z*^V9D=Dt&`)5C37v$LfNThnVtm>m3>y7>9{9=;7;+AfpIjP&&3$-`}VYaBa@KBvq- zKbB%;`!QzEp-hW1m-gLrN<7`nz4&f^0Rdh)Id-i$g_54CJDKi-2Gw^ZR)nxi?=QQ- zI@WCN&+4|zRe+awS!ZWw{)H)#rgZZ#U+uLPk`6zs1?Uz;nhge1rR3=9G&6F3LwlM!NSJWt>!2%n4$+M1J_TZG4iF z>%+MBzP-Y@DZ{y2^>C~ltMu7V0qN=Dxejdxbsy)oG>#lOsISlRtD`tgw07UoXkBsdXA|l^*)~asz&=I@pw^^ITuJI#zy@u&^*~ zyFPZ_!{v!P`sv7_>v`W7?;kqWUA|RVSpBf((WCEQxet!^+%f+t<~{Ol?*my^)o8PD zm1B+N-Mt@U*mjoXH#tjK5dw-@X&F)^uoc<^T6xz+V6c@-78yUW9_ zrloDCV`h&MAGB{u-JF`1CSaRm*IaYQy-#&^yhp-k#!3BtUY_T&oZjM|vYVT}WSm-Y zVd`gcz5>pbyvNXoJ_DUo9@9fcZa+V*xKzsKc^U0H4F_8*JoOmi#4A}F-Q}W_ZWY^qO81p<&l`(pYN4dR0wM|9T~8vuY`m!Q?_8U2_Aj$=F-YFR z6?*s0USLW)8Y}ly@5xc+J>5!noo}xguUNlP+${Lhr%yiT*0Dxmy&vLlDFGios44i) zvGmo%s+gEO5F7cs_*+TAZ=S8db0jD;Gt+F!W`29}&9pR!Y4*i+cDPy}AD=Ds%gqWe zT*3F-E`;p1|FZM${remDiT;g|Dvs~&>KdJ!vU?!!x!UpD>svkY?mt<6Hm8T-rW4g} z@m*++AM*^_jZIK%mUNvg@w$JPcoQ3&5Tlxm=P%A*UteCLT34Oj{)8pGuF%`_ZcL2L z%bMK|ty_eIgxVaWFUz_AJc*T|rJmF1>+cuxUZbn0mucI$?eN{5%Ww#9i50l?Fw9L2 zUS;Ok-Cr0Qcp+?8`krmu=upi{rv{q^25f2`C|g+YRaaM!XL%jC(l9yD*xAz~P;?Y~ z_({CqjTI|btBvEJF#Z13-o4?k{+U?Zj!kXcZQ3jE-oNjvRr)KIV4$OnGZekBP3`dE z15Qpmw`s)%{~qbkj2`YN=7|*4dXN3`$Z$cUI9V+Z&qPzz1v_uw)zw^99n!aM-IDR1 z7GvLSf46q~{D|(H*hqW9rVWBe--U4O;UB;~C}VFp|NJD(ws~Mrj7x9FX41ZR*cbe! z$f;qqK#d4WNl6j&{rvoxan%DkcLx0IJISuEg4_&Wn3RkZ{|zm8Z148L-J%wi~mciQJWO=6hjQ`26@GZS8QG^85(&ippNTHLF+Gl%`dlc>n%=|F=T< z9j32nczAeP#&K0$KPn<=>!@Edb8}VW6@9G+0=ImS+bZe3FeiPwHZBBD=S^wp3IhX! zjEszct5;XH<~fF;VDl*|n%QvSCmJgWKGBL-?I`l?92i)&Yu7FrS=mJO$Za!!Mv8gq z1I^kzaks(Td)eIwo30cT$X9>-c*@R>(b3US%C3oNmrcz(ob##Yn%xb_nxvGxe*OC7 zxpOZJ3*NqcYt{Bl)`M^B*0JHdj)%I*8W~h-U`mSU{XLGMDBhYsuZ4t^;h6>h{G3wS z)U*-3?Ws-@9WFb~RS6x49!0n$-?_Uk+g49M_4vAjH#c6na)lOG(Am`$cwgGiqAo%C zOR>LFLqkJ$Q?i}Bg{7s0_q6?|XL>8KW3->?UkFi)Z28q*aO&K-9Vf~ImaX7-s5-y8 zVZNtX_S?I6G}uo1CL6iAcW5%Ax91!?Q5W;VlJAN8&xnq`@$cVH8u?;@s6K~6IdAne zx;4EkD?3&5Kz4L$>dK`{D{vV)j}LG5neDkl4agjl@ff;mAImCg@X4t6rg%~K?b%-i zBm7?L*RLN%HGYU=Mjp7mgF~W0$|*<3Ff3ORN&!7R{TmcPCsa?)JoA|9i`I;rc0~0U zy>v;oVo4aHNxBjgWP!hAldzzfpP$z*^6_F3)}qM5CBj3{b)UCQBzKQZ-kz3J6s7n5HHJg#5xVSj)_U%V9e}+YaZ6i~Da;w*c3zbb=YuAP+y~4LV&EKw}ac4)`UBfo>lP43_^6-d1vuw?_ZF#;T zAj4;N+}^Pny@7R5&Sxg0NwQ_uo|cB@!#FYuVXFqQuS3?26c>&(fof8JhgYfuOoSn46mu;TC;&G&18#K%d=f63sL5MF3ickL%7JQCvLxf zPfoJzFv;I?{`~nf4lzkdNls{ToO$W?EmW$Uxaq4$clSt0=r}n!h5wM=y?bMvf)`)! zOTUFzU7m$6iu?QfaS8?|8VfufOV9y*9MBD3pC8*V78xDAZDwZXgW^_=Kj!Ls@Zsfv07`6! z@_i*nM#fW3sYb^BVLwIX`?y1fd}>8~cSMC21Oo%T_`O(D;rQ&UjMw-hd*9olhE4a) z2YlKe9%NLH^P1>gbvDO%q;||E-Z#2?+V0PeV{&=lUVG_Dj66OP!!o%0(^x)wozb^b zr9lTen3kV2!eWulaKf2l6?-BF5k3v$<4bpA^yj3;) zo$ccR&Sl+q(`Cnw9Y>unMB)^wq(#?K?%cnBqgL?`Q{xk@Llzbm(No1fv+L80b95E9 z<;0y868Y7w!m$lX@4aR0+1}-f%-4uH-EmcIi-pmKB~3myLa#UO&_CNzw|qcM^o7U z^<~zO>(s|R9VhXf#_`|9=}~)T@JUt!@jm0LuA0=S0LdThejmabf35la?_XROriZRa zMMZI@S-uHaMwLo-3)4vE*z`m< zrZwzRYV&`AxFt?k-{p+oGaiwvV=i*+xmEaF4p$hubo7xuPD5WLDhBV~Wh=A%*8C4U6PWzoY7`6z(iKEOe1KY<5qE_D1BX3;>Dlt;ZZ75XX9NTT2(}h zPEKBqjNHJ<#RX0}+WV2i+6o&IR$I)`fB)tY1X6!ki~C+k93VPT=f*-uYRawJlwR1X~ZKH6DIUG2PS z(AP}8v(7pOhkn;4{nDWBh)Y#;J z-cU!ed9>r={A_<)UON9Up5U407hLG;*0Wq(T=gjI`LA9PTD3{s>>bvOY9IE#y};A; z>kbF(tQB^4cG(VX8*OcEzZ81!>FVm@_j(`7#kj?{H9UIziMJIMtN#2RDIaW37nn6j zGrod-QCd-Y}%E!-7L!R)DLf5|4ehX8Ehwn?Nrw`Y; zyG+l1I0S)Bcgb|p<(KrR+KSRfl zA3s?UAwXOI+GIAorssZlE66+!V;~;M^sn}txF5Cg_CoJX-qSN*o^I4c` zRy%rhV`I|`)0?ucy&vTIP`tK^#yg9I;aGQrz(@3K+qUh@mkeG4ysWG)kB&NB4hrH! zEw{LT=j2M-=37^IB- zYL_=Dyl^a+2bX;v8~362?_UvK|#TH<>lS86MginR^5tq9M|>nd2R0QGqg{T=LsiMa+L$u|8mOFi>f6MkU&BZqn9mph2K&e6zoekh6=+TQCvV z&#L4!Djcg*ks@HiHnN>pbd%WeEmd|lHi9u4-?M5JBYW&xsL4$pLOIIuN~U^r{h|e> zGIonmK>{CZqL|xq9X0?KXXlDaOCMuHUrv1ijw;eyTl43x=@sWnI_0&Ov7DkWEYW-; zd}KGT=K63!s3-M@TOmr*r@3#1KC`;Oec?a$pmjo;5}1YTz^bHFcws8y^E0VnDM}~K zJqu`8nGcFUbdSz6eX1=3&OplXjt{rCUdhjwX_>2yl@|mh)C9Fl@x-A zsqQ#0KfgX9;0l{rH0CjB-ZR}{O4+%FYsWn+gHjho^q+1qDeyRAS9ry%GAKAI_hVhD zTHonPx{neY*01O5^&acG(&fgRbSSLGnM2<5mspDU#HU?YW@@d&Exv>NdhMx3S*ixL z=x(gdn>TM39%{|e>~Btw?|P|Wt`X1kvG3vjD-02n=-Ld4iHQ-5`LN+Kc}*w7cn;&0x(Ike|l5kq$rbKn+wrN8r8U2>A#mjV-f z6dU@)_w)_rId(AM^CRXoqJ((=JeZ$ubEHm%@+g@_JOP}DAPUJQfD@Kx@4t(_r%)c= z0=Bq*^>FA&hETy8w1#H~6UXkuGecRmm-!Cf_%JRbBSXLE+}AqeJl?SKRU5W_7MOyKFqMNJAg#H&4PQrUhZBbC* z1Z3@sJgT80V4m;9Yu;vbx23gJ`Ql9X8eFkjv_nS`*Vx!t zL^h6EiWP$VR;fnS^-*jLjDWXk3T=E1A4G=a8$)bo$DytYn*p( zd(rgdJjs&vos%MhUSDne*4?`e9#|aEX1hO+ex#r8wYAp<1_n~?Px$3~O>D#Ax(SiB zq_Ht8A!b77&(D-}ewCa}AZcjI@75^IZ>hKwBYc2vHRrYMCvw#q4ARZE-+OL@-cMb( zclnAH0_8VjV`Ed#IcOQSwYEOQ13iN}5!x1e`_|S&79YZSy9R-zIti5QF4nxWL&!M$ z>CmdPrpDQ4mTmE+xd}<53v}Q-2tWE{S%E$_lTJ_wVUIKo8(}3wU+B_PT`g_z-t@ zA@5m+XmznWxtK7^{S|1rWxc( zCN4RtypM)%{dK&Ya&Bsa-`o82JWazPiMzXOLivZkWLkE)A9yd!iCWHI z6ngF28;FY9IcJaIrVnw>YLwR2YNdB<hWo*=rK~cCXWxL*Exb=1#@58M7@4Jfmfu1DV zzkO5m=dd_QKts0e2GqiCXah!zsK|qR#r%DI6rj6YhLrlVx;e+r5FK5u zfoKumzi;`&bo%wP#s2!OWiKya?>;m9Lyc(k7>IB-^Q7#Qmc9ZSWG^!sTtd(O>(?)! zSpt?In$i0&&I$sWPoCXGdGz>k7YMYIvvY-{ zG1FK23M;MRBF&O@E97JWg4SqiYF0#1z^qI0c|$k#;&`!N;n1GDJMPuSJUJR0(IX)(T?Tm| zt9_dQ5WuHdUw7!eMy-(V7yzt{E`XJNA0FO*v?s~AxB6a~$d<&6zfGIb<@vWPQ(LpI zj)wp8l`EYnArU=jdQnF_p+Pg+w`QyR03+JrptCC~DiSS4_njN_ zzD^~BC0&{QAJH>u{gUYUCC`zEz)BP*qm|1jZX@4J$4*ms;OJION9Jj`I@;N;s3%kQrr)wPH$k~DGVV=SNr592o-7<9b<3-;zP_HIEkK;fvs$O;xF>KXs&p3a zq2s;OrrWCEGo$U39uW~Sip@GdR?c%*-tz^22Tu7;yC$);tu8+HAa%EUJma&nvPzb( zhr-Xg!pg;!Nkl?Yk|;|89WD3~Uy6L?iMa%Rp%KSKNAdUfH#(G9ShyFI^eV4PkXlV) zx>;dFtk&nV(x_7x9gAk^fahe-73_$PjwX=q$MwG8q@$l-n98>e`k|2gY)VUEV~W0A z-M1;(H6I2Ov?r=*UPNk*o<@hWMxPn8mX?;$zQEj(#8kGQd7+t^&Y?}|A-NrLFJ8Pb zik?E9KYskKa$6**$wzIfm3qZBAuz(X=z9zhHRl(8cTg9iWS!{Mii?P}>HOovI*IMu zw=?f?{PqqfT)+!Oc-`8y0cbmKaEBQjk4hogbdHRme9o*`LC2(BZTSIPO37wsZcd}d z6FQYD&^?hlR_TvUOsK%l$ngE+L3snoM8f5V+NG8JKB>*L0Fb0)`Cc1WvbuvkCbTBU zwo$cd^Q{LDf>DWip~Jy42*2Iw#;gk+tyX8}a_yb5{jG6g4WD(MI~er})4O!zJW-{f z68nr-2-2N6WR{hYu`gNU_F7KP)Gw=Ud)>A4J+!~>VEmJ4+yrYQb9;S+saO4uJhY2p`tf)y=S} z<(9JlvK9oU!g?@=$WBcQwV>Q0(b_5vrl}L<;l-{r%GwVU z-?X)z82o+8Pw$yN`8oVYcExgRQQ~Q9YrC2B+}g^*A`sGHS6+vo3JRioXu#<7@QL~P ziQ1=z>D&*Kl9)4C|JpzxT}wk;Jk-i0`^h`7-_U$mK&te`;y0CB_I|(k>688g1uxz8 zk5GI<0H)rcryQ2|o0p+c4yWY@t0V%o?77;jZl;tiwacqXIbh&eGIHw=l|d{f7&{pj z6GMt&dbE4o?99xlsy3at*=uc1k7Do_a`JIKy!6Xyl;0Hj{$WVC;?f$V*gW+qwn6g5 zi4&LhT6=*~l*LQ}+g*9~?3r@Yt$2MAR+up2n#KNqi+IYH?z0tRic; zZkomToZJs3x0pSS(aAW=j4&$ppFb}H^#g7XNR>-Y+ylClPeg7)dzB&p>g8E2KU;!O zx1+{woJ{`ok`+oNioIqjEIj7seU%bx)~q3dPDHN%U%%JBe+-%`2KxJnA8eOa)!e)Z z&7mAD%&Gf5Gw2&F#A{O0t&>C^J$gh;%AmIJ!wP%%md$qTa`E&$L===`hG)+{pOJD? z1*v`eOz#OcJ@4Y*+d-(Larc0!?updPs|RdFmJB_}7RYjStXUIWT-Gh53Pk)cn%tGq3m zbUf37&S)i$w9ct=VGit){-h0TY}dd&H5+Q;mD0al-vGT>D@`PycP2~4mZ#;*my@X3 z)YZF^Up~k|8I9Px{XV|3R8M!oy@a9GMQHpan!hE~)f?8+BOT(J*kp0atrO^YlS3_f z&Mh-z-OSX`zyzt=`!zIf{K!d9Pq&v_2fahg?Dg?3f2~%S}=Z9Y}6?X0%P2#?C=KGtA5hIZxOGKQ=_ZYs0??AJ7*TYb?t!?3YQwJ`M-@sj3eek1vDex2rr?uw#wuW2b4O9u1!4KE*EJ`aY z8NfuA)=u=OSH#`BLUy7j^E>1~pNU0bJpgUw%CFZZSd#3spBb>FH#UNxrp!zh{fJHs&AaUeSdBCw4)-kI2SzwPQZ5|7yqR*qFqJ zC=OgODVi21`>y(!k|<4-DJL%p#wf}id#%K*4^joa!c#07ZP4bi9aA&Mz z7oci>XxdPz5$h1-BT@)5K+T2&YJoMQPR9Gqsk=n#8*;VWg76EYBnetLb%Bj6PZ z(0#`5KieYfhs;6QbW2MMP^To1=DrJ8jn15TIx`3Mdso4W+R1UB*~@pw-#YLF<>_|? z4c;%L1ou)D;PURhF!=^&BZuxj^aYU}nxKiwH>bM$`mVzKdkX>WpX^9UIIjxTG%R=B zMag(eg3+&U5Ik)ZpO?1>+9a=Lyn>e`*1QDh!s+S6kx z(Or`gTUCR#72;X5t2=nfrlnMZ!XzLZbED@k*zeL2@tLJIQM#+O=2m!lKRAoeGLy)+ zK*}Z}mwoR1r=1cKw}O2r6yjpjD4mJs;NZ{{G8a+YNSzn1LQ(>(CS_rx)Po9j{s*-m6_Mp`2v~J>l+LjIH z!Ed13ZZpocUnMUu|Mj)k2K>m628CdEx7*!@8lS8IV`heu_EcZwxUq5XdA4a3$5Zh0 zyEYi^yKv|LWT7!s;UwU0HMA0lr`Dwt%lARH$YOmk4nd7rp_GOdyGfP;`vI4$wde;r1Ew#&jf3ITQ;E577A+_bt>Xf?R{xR?C68&v)Zt}Ce>tJ=iMdIhKZAv%VdhQ?dC0{ngXf#J=4<7+~$ zUtcvhJxt+YwIjXQC zc=h+LE{i&cio?&6b{24Qa%#VFvu&AF1h)nI-%eI!On%$M2aZ#|c)QW|P6FqCk_2E@~?uunUojugWfkbc$UBlke{LCMIvK+=Sy%?b%G z{B~YmUhTA(ifby7i?I}h5q5b|)Od0%aoIN-rP0^d`KJDpapXxxn&wp3yX!Y%mDy?A zFCJsNckkY*nbA&a&7&`0j<<|{1R^a(ftq@rhvz`833>Wd2+!Kg$HB(tPWMqUnA54I z1sl2K+`^f!_;tReEsF_ZZ)$2npedzEusrI(p+ntBp?wy0#L0$%>{2FplX0tWUJ7V9 z+n#d|#1;xhib??qt@b?L%Ey=eEwx4qm>jakO0X*4LH$*0)|6w77@u0xT7J!UrUqOF z@-;H{2>rd}1t1Bv6}A=>+zpMi$}rMaK%IZ={yb_v!=*=~a9mbSPGq%?=}Q-DDTh}5 z+>VZ7fAME+=;C+|2)+6KVt=4QO7mJ~P8n?*kgxi6lA7x3C8Psiovj^$)?EFWb8yy{ z{Oo#1Xwd?teJkeTE$>U&214OHHQ8TJwI%@=_gwZ?ARS^t$3zpOS@NO!X|i{LbzO6Q z;n0#oLtSSF>F@Kv6H%h z02(7B;n%Lw6LdfJ>~}CEnK6(PtAaFHcvC{vF?+N&zLo;rAuU|NoDDU%jRv&&VEvxJ~4vT%y%D*ZlWT;h?HIA5zAYwzG1gXH%E3y6=}(=V@lW~ z^;bg^@1l86>bN*N>pajiEdBP4n@irKx|q&)-|3nM*CEN-nZAJ2_Sw+<6b{~nW?5W~ z{!{IewI3Uwg96APVp$m_oaA{AY9>@M#v6J*KMcU?NM6JOF(W4z7s>Mrv=tp4Jou9y z6BQyPA>Ks`(yx!2NXS~mcwZz1OnOa$N3#2Vd}W-vC=h4{X<{vzrB`)a}*Pn zrh6JZ3N%+U$eXTVg%E0#DC2z*d*UMFa3>wOzWzql-?>5YSgoMewl+&>vQMG&m`O`$ z&N`ztM~ND)W@eYtHaTr+nIv;0Sk>ba1E@>TYOWg~*m5U&7#d*7NVxTD$9kc&UBa^; zFztgInPJzw1ID~U-p8ul;6M*#oR{ONe;!;w3bh=}`b>sSMA1waqkTt_DW`)P4-`_8 zCz`PGpPietz>z$jYrh#zIkQT@(?M6c)U1VvtN(?yrp_9DjBf=!6Vp39Qr0`}Kr@D=y?)hz zuNe+)yHm{y+5H!$HoY);83=bJ|K-a>C@5@lZbI`53q)lo9L&zlEP*q=(|^&2RAA3v zUzOe5vYWilyFN#Op=V|;LzBzINsv0w?*>|Bl z932Kb_N1fZ8mLm|y}S}#dMZgfNNt={u|9Qb8G1WXlyuT|O$5f(Bd!r^{pqYUNh2aA zbP{ku#^ne73)6xiG*Tp*N*Ws0!y_lEAC~DlT0jz->g=3VGFsKqctwITEC3&6Tzi?| zU4hmu)6>(#Z9?$gfK&r=0PaOODO(Q5DQrf|csD;g;XM9B9V&2XNy##xS%PBMu3f8r zChmQ1cJ@2mDiKwXIJh=|5CEL;m0$#c2<2seXP^0dFD3^51s#q7iEq>aqe8{hwRsYa z=-Ljm*VoWl$+kE@Gs+tr9883sY`f;wmhXcv-n_}^2LR&pb^Wt>xjX)65L3MjdibHi zd$OOzIMaH-SjQlHL5rqGPbx?GADbDo#FcJ^+Yg}-p_G01B(1g6&Gt_I{LF}Cg1Xr` zjX08#0w}lyEsnIiYd3BLW@YVqZkSHuoeu#FZs;-fH{?2;P&hwI!L^kF5jg$&D*GC7 z3uy6ZbXQt$%pSrj{g2l;;YtGRQ3w1pP!x~Aw-KU9|YjVD<#E3HXpDQxYj9K+v`wC z62OJXzd?XCZPy!K@!-w7=omMOg&!H-MJg(6w0Z8@o83Cex+F75wY^R8+82l|N(vIh z?L(QXRYYK{LnRB?dMNZH>I&(&Gbgmx(tK%YNjyK+1suH*Tl^}-iy!CN5s{+JF=hrO z#b1}euSctYs{d>~&Qg7<(XONQgfRWJ73nPY>-*nh4NRel_wcv>^Xvc5VFDr`{paid zZO&nCAD#;>h7RWK5NA~0bN$+4DFHxz{I@l(AOm1jM3RV#{{DY`9L4MX?-fIflVMuD zdPo210qYj3b%X~@ZFZ>JwC!S~RPi-zSQ4>8C@C-?-($-Pphh0itB#3|Cba()(j+LA z{CGwyH8ra^J?TZiEPc8Pfypu>RbjS`tv)o8}N@?`b0>G*c}Lt1Nr5HP?5g(_R{@RY7$R>ynk2H znt@2uX>)H&OP6>EZc0e!A>bk8T6psHZ_*DYbg+n$EHZ^5I*$AlQ);1 zg^>Iw+IGs)GF7%dQjbLLPONBpYUgu!!?)&!b8~y6lsvtB> zO>`ohK}?%1Mg?2+bai(kE=BGicH=EElGHUT_6vbvV*xLHX1q#V0$Ba&=U0xrP*}|2 z%_>0Z5?9wkp83E58hD!_M3N@05@98#3$N?SKSG}bGcSSWQI<_gBqTB?PfzhT(FD(~ zyQTQ=(2o9xShN@f@xvdH5T%`;^4jrN>JI_cPol*4e@?m6HI9dB)OKCS4r*#krT_0= zrVt7_p^u2UDX+qFE(c;0u*I#>e!9OVEU|U~N+vO=C*}4$fjEfh4ynz%9ltT5$d&mZ z4O;T<`i3cEcn5z-H{z6ap#h`$XwW9yhrIY@KtJ_{N~r1k(khCu-N}#yxHc^Xy=iH> z1a=lfzV*_nj$-|Hs)*HvP{!9DZf@`3D?Ru00=AHmm+wZjI_Mlbg_w;0Pp)3UNc>i! z^)D&IuAe`j8=4}+^i(@>`3v*nxVF>FD9Ce$;3Sl`wz88D3IqXbuFuZS&d{Qj;1RFd zn)G&;iY0KCHr%bibMFt8BCQ23O@?MdE5V6RNU$ZMjg!yB{=9;Asj*~ulRGyw_Z1NJ z?0UDVY*Ny?G6BgzglM+|M+k22=|d+Zm==0eOp-&w5Qp?xedRs8M3JvYxSs3}i&k_0 zg)7+SB`qy4DJNhW;>+mNKf_Quyx>E-2yluZmHv+u@@8_5cg z2!eOSENTcb-?2@%tFHlGfxe4cb%`_m4#OQHc%X@O*`(lC0+X;Qsiw^kFn6Lw7|p?= z6xhls6moto3(I9}f~r6l)nK-ZAS&v{U?yZfz*#OHXTGGP!+d>IhGT~!K}qO%wbLGT z{(8s{3M^ZNCxWx|9x^Cn#<71k!|k^dA83~LcLj-t?0&lsaV_^~TMVXvCtOqXoqJPW zE@;U0u=Vb}d+KK5Ci&XYHmH3l1a-#S_8>{Q27%n!7+BCv^QfO0Sy_=JSo0n=kY$31 zU4RbIA&#^Xhhy(o`$R_V0RvEn= z30p=oLjWTV*068%u!3_J4fateXiiC48SOvO^?jgw|C#sd@R?x7DY$v}?p^!gOJ7OU zz3A_vx7*N{br8<0j+v65-(83dBh@zGeHc#|5;__U=p&GW#MI@zr&EXaBJ6)Wq3mO+8VBdfIph2s~ zp15H>)xPpx^B(vxQnn2=B&wSP}TNhQ6@zOfBo7q^sVs3Ai{7q5ajox#aa(wD8S++pddXq z*#*Z-k&%%#3$|(+kqq8K6WLC7sf_>P>u^{l-BUsieURdqsmy2%$5Q=K z`CdN%4Y<4?7nxI}g+28aavmv_711Pg|FiM;eA4y~cm%XKxn;Sijtp4*bu`LrJ$`)^ z2Dr3!An|vWygi6mlYnq;XdYI-=PXZObi=G0@V-&Bo*WMt&0&d=j8mmnUW1=g1S?3^ z$S4jY3qnm#+_()=KJP~wF*nb=8_#h+rVq1=v_eBe-=Yt(7MDkho9{@`8Zhyk89i0z zZqxV050eme`OcPwiYL&$Sx`>Xj@RP8SgR1LVgSF0O7?^ZE|xN)xRyrw+Rd9Az_3LS zX|icC!@x}^a?i#cNzg|a02fCr?I61t53Z+C#!OH_>S2S9vZcbPpCPkUB_X5#lhqT?xwDz7|-*$J7%0r`3R zFIvnDG|MdMviDd>5&{KK;^QOXZC)6ZBXW#jDD9*>K^16Tjk1%@qV0|+{YJIx75PBYIyf|x zBj8&9NPPp$h{JYqzz3KSS)K3w`&?+#DSCiwRVb{^c)Eq-7^vWJ|M{r|67;0mAt<(^ zz)25w>+qpYpvaI=y46-G;g{aOcaW(&LQJF6ARQFXk89?tvQ%m;`Cc$%(o22{F5%Ns zy-jW_mO!hTQEw;@ivxqthx3W)%d@k}(+OU4WS zcMM4~ANu544h~VfJS2tD@M%yM$#-Bw=6jCB@oOOf16b~il0cGP)KqZB{!h;$qYIZq zXib7reuFCu0@6Mh^<(2bnw$#fwHqo#>6Zf&2V#RIp*I*0HrE~Y?1Uu>;`;{UU(PNH~FPtw4?M)D~UAcMFMSZ62vw66u8wFz7XLDQq1Dn5|?5Z9xic&@}4OyIfo z6d^hq3@Ijon##U#1t=tg9byQ4apU-1zI~hZKg1iGLDnp+NUAjNj*!lBwBAxYcpr?BwtdHr)i7DtT{43J9+@&1FgYL-C3i1N z5&`_OfG22hZz21U$P#V{E4yyfraQxP%)C(i;qyI9jfFf36*dT}2;mNdYL9(g$xbhUVu0u^zLziJY?6Vu2nuDxh7Cw(=x26|>>*|e32ZfV!bvu+zAvrWuqx|R zDIMH;%}P2N8nxa%9mpjQ4IG8qMTP?!WO>%DyNJE5uF4Ieb}0lM%L_h~)TQtRedQf~ zEJlcV)s_4g6?UKdDpVAw=qnd6Is0wbN`3$(OW+O$Z~k_y@F>I|as=LDe23h<#C9kk z2+XhVJHE0Z-e3`F-AvcMqiIt%0C;=mfhjU_b2m0hapm4$O${dbc*M;r8%jY(Aq`jN z=47-WKZwnH84PTro8BnkIoQmxXs|bbu#>*0HSiLN9vyA=Ojm%UZ~z6@P545!Iu5rd zv?7M{E{!*;|h=;$Z<98dc!R-HkjvVW1nFP{uYiB29M*TXZulAC^W*o)eHM z-vLGQcNDXu2fQWXV1NJ9tPq5{5R>fC{HLLvcdDfO2kMAe9?FD1rnfBnKmjA^G)2^Vi+RHXzEK_0v^BDwFt`!_+sgEfGa6yW4iSdi9U@H4_N zs3z9j4RsbLNiA?)z@*@(WomLh(s4@uFOJ`VfQJ@%-;wJpJKn@F1Hep#J0cGv1x*+r z**uV7MWT%mpN6VBo)dcyy_X4{m5f0M43I!$$l3@lVuFEDS`FYH*I!ykc>{9>I4bf{ zPckjzMzzP!ZQF9wEQ-hxVp4_odRj8nWci~onh38mf2!j@BhXz|e}o97m>wc8i};6C zu}CF5EKm3%n<~0@+cs63*q6_=%%MLw)+tW#TC=G^kRnU@=qA9WX51&nzvbo?PziHeUiZOrbu6R@#aF_yq zt=IF1%mOSvgg+6HjWS*Jb2A8ozR2w%G8{#H4Ofyy-WlfYBbg^& zX1`X3;Lw@mJ1IxMNOLU&H=<_4ku`bcCS2u?k))Lf*c#v2W7l-JNwx_Z?)eKB9wL^9 z;S3oP7;wg4SkEuC9?sjwLzt|3&Z8G)wt> zwyd5{Qb{__OMW$vA`_bJrx&IiL`Wmv52Pi7jUPr6>v!*0?p?YU{eZXaLY3Ht>Ui^#pUufX;3T8!mtJk-4tjd2>v zOfP+3Nn#w7^OcFM6(boGvv|>z7zSqoap=m19A@zmaBql!Mz3-ma8~=*-NVUP4XNu^ zvJzf~0R5m8k76gBwV6ME@cN2%7zQiH7%#+=zMr z>0(8T5FNaUxA>FG-u3Fe>o!pmAVwSaC5r1jJn*9F$r=)_z26|THDJ8YWI*9Q#!tN# z;HrI)OC_;gdGob^l)OgtG_y0o`;Tqu8)1>ua~@By+tGtfP(;k==FMp=rU{-V9uVEX8z2ItL3Bsepw2QJ4Eyta zCs9hv0Ndbbm10q}(LQ5)kWmYCEMC|Midwh%L*k_T zhtHp-`Th#~)h0?(XUGMSIT4a{K>7rec`RfI5geoJHL_r2YC8jr zqVQ+U8jy%P$vvlyFFznwB&oF=!`+}P<&XiBWf=Ay6*}TLiYZC@z=Rn;8}4VFD!oW; z!dkQdYr^)WMeLgZd(vrO5xs%aBk;!`AqBQwrPGDmd!>3 z(1X8Ft#39x3yShvY0av()Z`D)-u(AnP*?PErPi^VX{Ggj3{*oog&Eb@{)r2Y`?Nz- zQGpT0ANvKLdyMm}d7DBTlI7IZk8yvUF^)y`Da^1HwNo0!)Bg=a67m*d1KdY!QnM*r zp%8_{!3rJ7(@4!S;*}$}9$`>hFVoeAjJinyoq}R*Ju&?0DpWe+kYl-S*kUgUnZ1@F zBR1I(D>wVJFLi88m7HmKbiKxh2!pk!91`O!=K9e_rPaIGn4I*JO# zww!`FLQUA9yq0LBW#jkE5itvk$i_Nvg(kJT{XKA+)f7@+3WrLEfgHFG&qVVJHh zN2YvGVFA{+;6InRJkU53Y{ZVd3vAKSTEIONADQaqMK zfO^s(O>$VkB+OX5$Ro^1*RL^J1dbtr7d17t@a+Z8-E@x+-{aFX{K;7WevRn{mNuGw znHy}-b*SGj0oho<>ZHEL7MJ|cSU`#hL=R_-C6zhQHC){bVC+7h7rXH z2??Be!zcwLbwsi(_^s%#S|lZi6R#&0|59(aI7Eg7MDxH*aiS2S>ja+Pb7tpq3T)Sd zlAOiwk?ay^)F)d3SBmkh9OY(nZPhUD5C|fJK0i&LsmM>*NaM*(%SumrN1cGu92xR- z3`~|xghcFy2}H&pTH=V5lP?hZ>``|^5X<}e_}Qz?3;(wF9Rx4lW{K`2~u&O z5(VQX2XnYpbmkV17x~DdxbnB8Z-#}%AZn1h4=B{CL>LTVF96;ZfGJzkKEjl3H9A(X z-&>(Tc>@jK&TmP-5IO%(to7Q{wSy?SKYG0(RMf=nO_`|pMIH%kwWYl)+WQB3kAN*U zT|c^aXzK-dNSwEKUB-m|Tbzf%uRQ_N^?=v|n=vFJ>g6H>5=`1W>^m|eMh3$1U&PIe z4uGOQ+8{8|7>jiw?`5-pF$fApORgMYSiI!q1_D~7xnk(!014V^ofQ3;w)pe2C=rKo z0bP-0VAUJtYmo=MK>&7;xR$1-3djVpuJHN{ZcwJTAZXSXmxF<0AhhmPJasD=G8R>E zaehLJ$PIWCh>+>4Y?*w>8*L~Itk1q=l$97$=kM&MEP z%)?k-Lb}VWiCz1k!$jD;_he+X))p`h^W8t`Df-X#SE0^Ca}{jvnUvWrDft#rFuZbr zO??jSM?2Tvh>Uz;!bS#?I}sL!=NMCP>Q7>=iK-5jD+pjB3A_wfZ~)^gB(N@{tLo=# z@O!9bD~IWKxHUIsCFw#b`vAgWd2x;9U^tM#PNSU-SaCAqf)LX(!q$jaHbwG-&T@CK zr%7%k;(HqE%~EOrxBxHbxC!N{GIR>$@*(JfT&_t0HsV8|0P4OJKskuq8<0sj1gJrp zL!00$BAkONMm{oqeSRGp!KLD2CBi?UH}vmyDM8FA)2Va6dPWtRD3RvyQji1yT@r>e zG2tZt5^~4#*!#~wlS#M)h+6RRs=xpsy)K;4L+FO&tux54zxJA-Be#KPQuezDN=d}r zuWyAnNiG9pZ6sC-wF+o(72@9H5hE^H4g=JBxX#_)n)_gRdHJESf`}y`#<1o;3Wx>n z_dQg(TL6rY9T2><+4mN^fV?&1w9#)ged}37>RmX)wZoChS-yNZnY$|M!TXu|Te5&z z5QDf3m$@n~Vl^XUqg~NxFruLuAoJO0>i6%)yZX+{0DL_++>0wYgs_Se;z;Uq1hq`~ zfHz2NDZ%UiFg|?%8-6WnyRawGNwvN{*B2oJfP+HoU*1BZ6Apo>hl2HPGN+WTBnp-` z1359Px^H+m&GqRt*5Eq|tx;1J)Kkou1&|P@XmlZZIC4%8vlhQ{mLvK62Fi&QR|A19 zZn{fJU~8><|9(7#$IYXQsEG&Bs>c%#4kU&2yB>ui^IB|YDk?_=TH0N_=?c~0Cbo-O zgI26uvU_C|$`G+VZd9>Loe6+^bP78zViJwMqQ;c{+qZAzjUjI5v<%4G7!W1fzmaAV zFO&e<*+&wWD?zggLq9_9gzL4Lx+SQ(4R3%5#96m8ihuc17O{mZAdVzu6JZS^W5Bdg z>vNxwh_3LW%Ctu{+~4*p=bP^9SE8*I-If>*F`=8^ZJ;mv3(pV%)s zFCy=5Y!ctW!R?1eC3kh`Z#sei0ZpCt1@41%Fr;I9aGr1Ak=w=Q8FN6tFMF~JCISiS zy=iXVb7IQ+k?yk$Y-adgRw*XX7YMB|I`n8a@FgVPO>|6b;gP3BBqb}q^*lc976fC;3dQi9@-VcM<%7hS{WRk>?2z*caj`O)+2xR>{b`|L?!dr0vmbf?>yyy<^m>64J z@P?{Le177~A>4*;aub`eLhhvG_}Ewp20ch{4WrG=$ZP11A7?`wJBhcm!JdOEK=fD; zOM}!T$OGhUcx2QOi}f9*1@+m+C}@k_=-cENW#V0r4-|db|Bdn?zWDF^%80Unnsn9P zZ1yWg6V{O^2k8jN%n^pbt>hOBkAYWMcokF$#6JTOQ3Oh?d!oe?C1#|X`J!5@Gv@By z1nB9q=VD;N;+<62;O}&zDw2sPLNTD{Zh-=g?zDRKYBDBEZWH5Zy@UGeX+MHP!NkM@ zh-N^x#`_qUarA|BAJHRVC1LT7p1l7@Sy>rxl)QwqAV{KN0K1)N8u|Wz6%p*`4Lt{q zh#B)^fH|~)VTTZlV39w2TlFU1^5YtprzD8FMqaRkG0>CP(?GXd$c-VPOCnuBlqgn* z;1@s>N;izfVC6ovR}$>UmO?U^1$F~@%i9eW;Xoqu;PeyRUXn}Z4v6C0TN@vSkLbo0 z0a$}4v>(lSCz>EkXX}(2beb+$finY7;+=r-NC*#!Ya&Fy@*D{jFPhsW(mtqdjBf19fn~*vs5Af;?_nwu^NCDQK-5t| z>%x?*4g&g7+fQtS2c!)0^L=nol#zj`Oe79Vwh68YRtYx*!qEQyGeCtU7+xYrikQ); z`u+1eG~igF_6yho3FZpwx;Y&TN09$Q7I!To9rSam1L))B^pLXL{>&h8nQE@ z%(A&(AN9Yl>%O1kc#h{dp67U;$8jCM|F zZA3nXjq5t=S09jj@LMV^`XrPxAisOb9i3#8YvLXrD(6v zV3u_v7B({VGGKvXriDq&&-8DD^diqa!m0C(P6GsIrpk{5tCaAFx#kDk+I&$E*m#(p zNy+~MXdd_Xpr^vQ0YhlhpHWBD$A+hwo-g8 zCKdM5B9yV*$O;{4#?UXFNAB2iIN>+E`pCYqnrHh#RMDT`H>*cGqpFCWnajn)hnfLF zzY7o$MNr1{_{U$ zPVG$pe@Z!C*u?(*=l@KYxNvX({^$R#0OtQ|m*&sR!s4%c#}Di!bQ3Duf)&~Sc5YA~ zMT?J)j|by#_V3?cm>tTxugT+R{pOy>poW&a9Q^zKi^Y$Qdn36*Na8k|nMGk8_wwZs za4dqt($5~*D18?Q5Fv7V@<2viYcR2wVnsAMw;8MJG7Kv%q?8>1T|?wRA3iLHXg~v0 zM<#d_m@tov18&*kIX^po{Bzb?EVD$x2tYOo=3=6`#CsfT6TezDQ;5ApqFM%|;Rs7P6JMB4nY*rf`Ct4oR`io?^SnL*9W4QMc+IZhW&h8 zSW`yOIpp)uFIr@Q_R0Ob5_Twri;#rMf7%j^e`T!Iq=5I&mb@L-v<@X?oySoG~Jo9Ve7hK3eV+oS$0Mow+y=soQGH5lTHk6*~Aw>-Y7N*NJZ>nXaI=e1B%j!Q$;}Z z7*IA}PlZR2u%`&jLt4W^Rx0#PmSDDnzHVO)H3ht4?#GYf`#FOOR5JS#br41(vd6{6 z$Ft;Z{LBd&i^y;icT(_5pv5W0^CFpJ@P=?=A%Bg1%x&4m8_(kJp$G?|{XjMXTMd;2 zE`g$;g_S=pxPTdczd^btEZqoAE5(j|wmW`@^E%Q6G5Jvoi5}26gIK(T00bgO@BQ+8 z)7;#&Lt3Mo$EI17vD=X_gN^sWmO4Op*Y2IJ5C08g>3>GSk`wGNN@?-+7d4YzP4M_ejyutS zjKxX-Jzn9L##^-^!Vex1bb3Yfj=bhU>m9=f&VT?flcCBQ8?U5x;1s6{+#zpC2!B`ZrGTY z3e0x5qMCra_NU($)`eB4N{e_?3n#l*;ih8)Gb!}s1tU)4u<=->9ov+tSNIUb7^cXd z4(-mv*7n_a6tIjrv=pQ?7#n;0{!f=aI=(Wy2W*uX!0!G6*25!+uPR zb9jzw=H>$Uri=4(`SZb7*}5ejI6~AQbQHeeF6Y6OH*PAx$1@8+?11VCb! z4556-!S~euXU7kCPXEk~f&Yv$hv+pce2$ohK%J5RA?S+e&>B#E`wnAUTaN5@EreUa z$xjHgdtWWAtpj0T6+j+okKw$B$EtRDzB4!Uw+s!js16G{=% zFacY=^ht2<-wkKd7F{#dprwV&-3j(4kVGnm-9=Z7kpF5`SOdZqvQ>VR zSp-nQjn6bMrbTgQoogn8u9J?1$rJnpYNP>vpcDjT5C;k*bF2R70PgPyT_Yynu zVj)ftU=8d+ZSY@W)LEhrcAXeXu06f2%ezjz<{bZyGwM-ma2~-9Gs0Ow`{M@%2}&Qb zp%Jrk#|L)hsp*f1UxnO|7F`I5qepp+w33HUmq3xj1BW+~{+J-~Ctm_ZGH$w_eEGt% z=@lkpb_0?eHniAUV0cItAR4`6Al$P0&j`$(FX(okz$TOBLGfmYo8`OGQ1Bf+oY=2o z1I03Jq~dB5Q&FVrqvv6V$(Eq5-S2icC4DY@NWdB_rCZQZGQi?QU^c8H&`|mtnzQ*Ra|Z?nT)G?2E=dGP4KYL#N>^Y_L7)amheDMLn8G6a!D! z@!{dk!AW{X#@ir{p+@#c$~27bL9iHIoP=%Q2?AS5g9djd{#Fg3y(0oe;!hJj6n?M% z?w(b|mxZDPdj-*6)Otw5Eu#Z5;DL336qiv5%OrG7Ibnf zQ~g^1;m z)A)BlY@Cen>4BKP7LeO@C=v-e-+DB{O?h+JEm`n5qUtix4wL#x6H(_r_UD<&R6CHI z)SvDCrauaTpu{TUYdgD5sJ_5a{AWfEC8AUAoRz53slv|+80itsqE5FR@K*!CX#L%U$V(qV8gKt?!{tG51Dr6 zvb}vmHkhU)dP)>Kla|;Gn2h2SJPe@czve2d+E;Su@V#>e$O;O??#lvw0{hMt&R>j- z3*67rXbnRpBzZR@lx{_@3`=#?G&KZ-j|q6#862IXooLZh04 zNkRvz7Qyb9h))^QE=#HCeEL46Q>&bp)mU1J|oP*u`>Nu)5-B z(VkXcEZU68Xlgpn>R4K)Jn^1&&peh1{4Z;SXz@)}*%7^7rtn9=ltckU-V6@~=Xn^y zwGl}vV@p}#(`su|;Tr%Ie=#OI3fK~qP9l@=0@73DjxSPScVQ4A8LF)IYw)ieA<+?p zd>OZwMC?l;*Dg~HdHJJJS+JdAV-Kd3kqpO9h?QSg0pl?P){mx_vsy3E4(dh~R8|1c zcP89*OLrKOi4>4T#?w-SyUxnu&8S8W1z{sXPa}3<1tKFhfTIpoS@M|+@pvE6Vm6;T zFH~My$^`98hixASPY4hYh1xf+1S>ALblNeLky#@_IX%`G!JtuCNJ8vr0RpF*n|56n zyW&C?9;1gOgp&kql0!u*y#n%E26b6^7akw#NG75I%h`viD2`dBMhU?5%nVxZP8*IG zAwiS~;o!tp9{^lEgl!Uap_zdXOcpt0UK1S@bX84T^{*OtTPCec!VfD_-MrR{UJ zL!xB@1V))&*o3MLz7l~f#SWA~{K)m;r~46YG<^4v4eG4tJj$t?b^W)x6|~CAx+s(L zlFDxB@P=uo+r9mO}yi__-I>25xFHZP~)S;1}|u<0M&# z+w;C6-e8fyY@gR2-@Kqg8k|YSgqzE~^Ab(pvIYQ<#3Sy)uxOQxfC@XkwD$B@ha#>~ zl+hQ*caivPCcj_4^n0jj$S5=O>({Q`*bQ%OC{J6|r0j#x!ts2H3AV}o4?D4Hbo}|<`%bM#;bPc`d(kFoD`m_W z5Di)1u@PKt9z3|xs{??%akd@CNrz(PgrlY=F>Iq^0i7V4@BUl2=LI{*EH@8QpR|Kf zf1a4FKB+hs1{Eb6)!T4~1s%A^5qo&V5oAP541%Fp@(TK~8*5@rQxpON>tDugC6;w( z=Vd)U?|I~1rBJrrbuLt1i&;qmk#LyZ%FXOOPpZX`Gpg$TL;(5VXTEqIKv`Z7CpC`C zA+CPQDL^7L6zcW5tZ>MlmQnQ&4oecNk@P;`Gw)CgPz!?OF%(E``0kW18YzR!0XQ2; z-J|eg-89bkVo@HqO+knR#Pa)1KEkw5kp$PQf?ueZ+aSuh0IfZeJpedmB8ZHYM~Wh` z|D=G1PPqe>PF5BK{qQ%^V^ReXCGy7%g9q=P+U}zoydJaFcoT#cf#d00S_7DunWH4h z*)4m%4~rsV@t>c1#9#4pHlYdK4jr81M(%IM`yONz>cvSOy1IV!91*}^%M1jH0dqv! z*>6~<7e&sF-y&#Wui93GC-d-DI#|GZLqp+Aj|dc+-8plp!O6A+CVw*}07?d0Ey@>j z2ca#YP0w{VHw~fi*i_D5+c?S(3u@}bGEU42ERZvm$>SCT;N&jC z$3xHabNH%oV7^sWe@TgP1 zgpUE%ktGlDYR7M%?ta&rX$9DCu$yjt{1OUrRBS?!-#8*8$K*64$2%Ok11L51YIlcm zUj|@B_r@#mPRzMFfYJHriT;j}!Ypsc2V{LGSP0-3@Q5gPmY{{0L+0&1$`=rujWf%4 zlD7cuLkGrCN6lqy%E#bOB%gO~;8XJXqeT7z00*V<1<;k`;Fn9=4vy#|vWS{7V;ljz z=m^dRY1B#Rrf|NU+lYwoSm+~=r^jcfu})JIJPw@Ira6u@%I-Z+yHzD+(Pm=95ib9F z9#&uEaI>Pq3&LH5oVv8G6jmmxcWyTcUhEHaMJ^1Da!<|r(;&ZyMINP-1<(K_oeJF7 zPZu&x#f7ZInyUtwf@DMJ7}Y2iA!nx6OA#hnGRQd)sDu{kP`k(s?_3^Apvh7z{dj-Lv6gVd}V^ z=Y_54?`e}}yqA%V!RsM3P`6_q=h{H3_iFIsyJVM{Bvas)bmiWQK?PC*tLkM!gq`@)a6??^O7Zt zUiG^Q2~qnOOjo^~Sju-wcNBthG9KkZxjIwSEu?=8dW4QeJdxfOr1d;E^ro~_0Hup( zRPlbIu?N-;*CG!daS1uK3JWR9|Mk_OIZ%$=hBcA!f5`tOm_+5)?qJZ@1f0T-REmYQ z?A}(Kd6M2#K_4651DO)QTkD%jIMD^4tnlG?0$s(hq7gLAm}PK&@e6~}Dhz!F3aBV~vJfN+!vA;?`(Fo?Y_G}Zj;lSb zLjpfkGm+orWPUj#A}1VsiLSjq0eFZ@BIsS%+a$YI2(3i#+VUMj@QZBqpN*CMvk8Zv zJwNi^#%si?MWSXp9vIPfOHCV8F7D5X%Q~d@SW=zCP@L&bpu>`|=h6VDcLH8lxn(~F zSSGSZiQSkG(pYGnP~ul^xa4*R7!E;C3EmH17Re%l%65g;mU<0mFUV@{HvL{S#nkNJH!E_a2gwJiJUB*}a}?8BJszXoOHdXr9+N8sxQghzyAyH<+r>Wf2807i zktIkAZYQxD(#*6fGWz1S{CFPzwrLm@>z$t~*Gv@kPsEwNrsMBEo3D7f_SZSw%cX$a zs4h0X<5wqV*9lEYb`Wk)zjrkpZTV7wR(Viyg(D*{-(!3)U!^%JX-eRt-nM6Gx{KlP zg%Z>7<+(?-`59(Knjnfv5sw>7ymx^S5&IIJ?^%T{%<1?%)H5WggBtV5qV?s`9zjXe z$0z=}{iQ#kDC>2E5QQOU%9)Tp{HcW5G9ZIl`1rq%NNDG`%$dS>xMc8cl z-TpPu;^SDAr5Gq&_r#D0bu&l4GnyOPtp>Y9+qkOt`^WR29equgAHFR$Y;YU`px^FA z{(gRo9~Y~u2hx)?lnr&rI=jTJnHO}UI!COthN|HUf(Nh^J^Cf+3MwU@$tXULKTH5X z8~`KW^@Hv-@zsG|5EcN-B#b~g|Nj1>o>2Utc2(@u7iDQ+#=f);` z&UaJg$=oPF!h%@6u}_r)g53`v>*uSYlfySFE<=2vI#XoBqa<*CdIlX>$Nn$~NHyTb zEUkN}X+Iu%*K5aVVV&>Y7+Cm8i9@^_x7P1X&=P+1g#4*v9u-DR&rezk@^zvt$4n#g zajJKI|Ap?U4piTt*Qm&J7OXnj)VCuy2u<4?tn6^apZf%~yz=?l$d-2x2ASv<5w?QF zgj3(`Tm@{K&Q3bjg*L?pb#`@$gvY&_)lpAfu=E{BklnIH1EY{eIgakYMVq~vXky?R zJG$Ekk=-!W%|tW#`4R}qt{~Rphw~~6e(YipTv7Fksb8T!CSX!!1ry4nw~g-34=@iW zx~}en-j|*tol=YS7H9Tf;Lf!A7d5mNLD9FV`{JcbB|x^H9?kBSC2H5<*&4B;?uoY> zft(J{$DUZ{JO8x@gQOINOxB@$HbvBzh#SqnxKXV*O#sz7-+f%#ojV|-^2#$er3dw`);}r~iCU+<2Q3G@Jn?f(L zM&32)>^A^u6ch+dkK8toiu{m)6j(YD!9S^rm(JU=zUspRLRbX}EJwBeBmW{Awsn&xw(|DT;WxmYy^Y%; zTm8b?8u@}$LvT72H)kIAPfNm`E{zNwIsq@H_hIFcQMkVET2INt2vIbgYNkWl7W+L> zlEse5ob;G|&Z|!$_w1Z%cQAx{7VyH5)dXT6HdL?$Rc;<0q2rtG?v1~H_T^?7Gc#oI zucpYFRk?v$Nfs&O18!qJK*?1}HahdAU*I1YjM)lVhVu zX+#lEr;(|!ZA(vXLC>8CRO2P^9phiZFR?p0ma|hWHTN*lw*VH_vNbh*y^Q0|0tom$ z6#XYTR13%%IgFQ>W>Cliz0RW&m#C@-;nL$qA!Vf0NfvZkq7f}el2DJupY2ELwa2Tz@3W$^xbr2cD zMf0<4z_>a_uw)RiXGh$_)Oh1ZWDp=Ng_IbGb7X=pEUKB+W2lXW#G=5)M2^^5^vQT( zaV^>B%qDI6+(pGec6)Q6b7P~q{?s}KnZakI$#%$b<1LA(Tp#xbl$ z8<1U12;sD$e-(5QKuiWeCd!xsgp;j%04S|2_!_TSM;!1b;-?Ma<~L z&V}a-^PB>dZ7BAF0J^1+aDc>_r8p^-3>K_aL`4XSOKRIT#z;jGa^x^l_erkD(rg=R z>qTH{Q9bJ<7}__aG82eP^N$@_5rG5KM9wXD$DKL*D+07W$-Vsi$$yb8T1nKHZ=ic6 zA&q3LLUMuhFOF8a%WtQx(FT%qf0C#GQ-C>|E5g714a&pJVI%oMnk@BNxODPI64i`s zjaA;$Keu5P3FA~~v>W7TTPSkMR_&gwM5~PmsVHqX!0LcSJBcKDJqKxVAq%Ry8xI^s z+0#>=)cPnnEm7(c&kP0^6hI>JjlAQzSFUgn6*Q3xK`w*pb{%AnCVvGTOWhPiiigs5tL4Xv>+P`h>u^5 zN?Rm;CYEDl^BIr2U1q@L-(YqOA)W0QSVR)ailvmCT#hZfQjIU=tE_{L5B2$jO5yp3CoT|fnf0r1%YsGTn! zlp=>Cl0+5Ko01%6qw?ywY+E_5!$PrJPoIqZDF%rtPM!z@fC+6mKeW_jTwz@za%wPhByv=` zB5#fb!#4y=k_D7KrBvm#%Liz{ztH9u3EtPG7e?M+c4T5@BDAG}CXC>WIZJVX}d!eE{ zxdlqEI5-#Q0zifgo7+Y9@4x6(S_j1+X7i$NtW)!@KL5nci&gCh*&G7`RQHZ{sliK0 zll@n79x}HJ8Xv>@JxQ$p`|px71ONNa|97{hYL%aWGCSrK!F|iqS0y5~#(r0F)^$-UhJ0aed9-TPv!M+pc&nUkTby zM2uTlD3@pnp^BhLB5N{K&iR#Ct>Ava)JVFj?jIc1VSdWmZq{_AFui z`+M`apJQev|GB9Qrty&5{`a5%J5z#WU2T}({BnhU{hs=2rEC7I1~+u?MBPbmJeiY| zLxMC&H;6UIHuCHu1_l`ZxVC#vNI|hz=JMCuup9}C0XJ^kQ2BTkj!S|o($LT#GIFbS z{NH&!)>`l0_}b6j&-BlqjVGSu{byvSnWI6UTLuYH={&*|iN={IapAJNgr=O>@9W05 zrt6Es10;l9zO`x3zxOjM{dX_aCt+P5ON^Y{NK!3HnlGw;V*e$FFF-}zb#gTVii8WN zer&=z)^&f47m4%0&IJJXXhO!{WLkiw#sBmDs?(BIDeJQXR3;gk;ExD02Mo9AY#vPF z5Zz(-V>q5{Pd35f;R`Qcdi=MyDf`dcxGh|kH3~3Al}K2Ejqd8DKdbHG;p1aPULm?i z9d{5eNE(M0KE=5pc7!6Q!eK5AOLjI#<&mUTC`8fiM2-DzH366sp)%=Hp$s6y8A`f1 zxwE<%Q-7^-A#tG_0_#PdZRxMV@E70EipZ!uE}`| zE2Tg`&>C726Nw(~HugqR8WQO#3cC(KZU}*nQu(O%@75=G{#)`=@?ZPXPt^gFEe1P7 zTtL(-aIe_a6CL^;fN&8fA>gN1pFbOI^|>QRQdJ2MO=1P^UdAqr@}nH(Kmh*rO3!~I zMV+6sh5tW`YLy48cG5h34SNVLImrpNKeAzROwZ=;y+lu`-zlY`EDtU#-Arj~@1mvh z_k~|cUC+|@TA^^yB|mFD5xuJihknc*q?Gl}aGI6|z5XQP5!^A|#p4vu+r)J%mZH)Q zWiMShISpkB%l=!juO7KLu_kQiBv*Cin3jmO%$Y6Es||Mko!lXcB0)JyLxO}465*kt z3g@Qm{_U!pk_;B6VqcG;;lr-q{XP4n@7vnR`^>OwDCk-ie<^SjH*QYqe>-@V$?(iswP&=cStUT9@xPUWtkp zV-V)KmDb+d#l)z^nD0JZ)z&X8^m+#;N7U^z8Ht6jV$RJD$q#J2O^pQYHN!9b5o!vi8Bp6u3PBhiE zsW4AwE+vayO1?hNFE*=k^iU*ZPA3*_K{9&_s<6 zS;mLcP&6ni6=^Op@F9OI5^+{W>4iphN8p99XUm0cb7<-eq8>bl-JC*t$uB)K;IHK< z|3p3a8tdLSla&wDNSuvIykXXUh($DO;A+H7tcBO|fU|)Cvay>xWN5GK8rCH)8H%PCYX)$h@Q% zLW4#mxp#gY$?j%JG%FI2 zpQp3FXJh%MVq=BwUM+o9)^*|uL7GnQUaX1uCM);B-t_0O4n4?bb9sH1avl(%?PFA& zeKxX*cj|r`e?|89=VU%s9JM(uM@v=g-B+4hUAw-GZc zDrhKo-dM~kZ>RJ4IUKjhw_`!z?zO|$^D=;XP`>5-KKUP-%wp$g3qKEC+n3=WzS!`v zc8KnAgCF-7NoPt|$j4nUi&)JvcEtBy{lWY&_7xoAa!cx%CZ30+N;~lu@12=IFxnZ} zIX)2~AKW|=@dYX@+uHloDKSJxg&k|`UD($v3Q{G6cYB`>+*fyXWOzxGli&ptiVNtpZF2lGb>Lr!9Dw>3MerI#yBwE2^8enMx%lh77x_gE$3Y3=K=^2CyWuXB3e z^P>)nGo^FTQfVA3jDJYqSh6x?X2z2>CrR=bpU3^}X3*%hk;top(}2y{2L@|9Eye|Aq3- zVjm@wK(5dmLP|>f3R7dHN3YHse;+?MQLd%1e8ut=lETsXQ?yrZ-7E}?ymF=@XP541 zRwL%>COnV(pYE?Rmh)l?&%Djw`}yhm7LP#%oX7I{`Z0I(xb4HPM2$u)29uMMuxd-L zac!eHVwy-sR#$`90p;(k-k$!2s`6_h%KQ8U2=PWw!5P z7qfqLypxmv-WRovHNK}0#w!LSIgdsObEIefiGggBHu-&>N+`1uo7bdQ^al$iW&M@& z`9wxQ11#c;8Rg8F^IBO=B%Dq@~L`pxKzfIMz+i-jD z?fZfB?{9EWZS{4P71cEwuIlu$*?ZKXu|1u!gmQCp+n%#C-B@-p3yYKEKF+;d-EI1( z=!eR+EDi6A`t8}^oqnce1*eD@u0P(1?)i=boDX>$oGSw9h4hjK>eMzWF+Spz^x1AU z@b_wSBa^uQ(5c_SHzy;7eRZqefDA4BCskVWk9xG+oE1)Dd}9YbtNI04(^E{Fn9~20 zC?C52GJ1cbD(!wZj#H|`yRJPrE~wI=6LVNo;bG0iDD#@g1T_hf7^J%lFJiyH3DdbLs!~kwYJXzm`e}Qx_yfsBW>fITnH!=I`v)-=lY(WAVrc8W7WG z0_+?Y@|&Ae$}Z%~AL&VpvG59aIA!TM+%sX5%9nMPp-=6LPK^2d=R;T6Y$A8tx+})0 zoWJtNxjxq73|=nO;zD{FBQmOIw_Lnj&cbq7@P)>%)YYFNF5YGo5*9DZ;pH4!%pBXf zQ()}CJ1$NUabdoeTYeu`*U_8TX>HP9ThCi)B3P{V?&W*U_${w$9sKzOW>+UC@_&2W z_;Q=S^y`500S1g2k>-WtFMLGh{C#-`-`=glKr-&CQ?X;n;7MgP6;zQNU<}diOgxXv zxnK2aa+(aFRW0qTUrl$FTL=8W2c%42<7{>eq?ZsmUYlBC(UiVftNVU~)WfkN_Ed4< zuAc7XqdiKy4jjF~Ib)vjFMs7+b2b927U z{_|<+J6@O9mXWJwre979`7AY-U1=M9u~y(!ApJ<9&Mb9h@ysakKpkpV^&50C(WD0O z#$%?sZK||8a0$hGSeQ9)aFhs@gyP%1bJ|$k8c3hy!tvJU`^|k*J5@bi{519~djIbu zBUi29ocjK3#jRp&3%&ba?x1pcyDTGFj|9J6eoKYI|0lPXH8zW-kN#p6Z`9s5k&`afq1$1oFDrFp&TLLku{s=-w3!+dIH2WK;k7e|}I*P8d~T#N#-vry5e;IlSGA zz3V905sCuqF*FrZQld&SHz$hZk`mE5(<;THho)XQY`kvn z$V?NKT#ag`++3!J5M`78725mumZg{Q#a+5goeY@xv#pbjds}Z;Q|BI8^RN2%*aUkY zhv(H_Z+iDzWOwkF8ZYlz>>oMjhv-w+-VxX;y{0ZB?%4EB34dw-9h(zZ@A$CA-*<4w z<}Ln|^9Gf(&S}mJG_7ZjpJAYpJ=0q~csn&*XVy1dRdsIW8%nmzIlU>NqMpqjg9X8M z83ijS2$0AjF5$9giG6ntkL-!|PR^*Y!`mHP{+jF+LDcP8fOY$i= z@+>*Bxopp%qTf{J0q>&PwBCdlYi?M7Pq8-T=2Ub&e`3Fl`>`L_ICNJ!zc!?ly;T%t z8XBN{4A2-9D}Cu8sr+E;qv1FCpSKUr{5pikBl|Vc=tst*O=}HzF>myU;#j%jy@{#$`@3x?mZD!*Q{{@CJQ!TfaV>hwQ$X&bgUC}dqpmzuQ@(l;(Ji`u}+Ka*Z%%X4_-@f4wSqLN1Z_+_M7o? z<`}!^Ld<^6!PJ*IclCMir^!=m%Y`j?X(-cM3M?5l34zh zWCp%IjPsc*H>x8Xl3r==U_RDv{h)I5 z+oS~93WXRI1zYx;nupR)WFs*v&g;)-hIY;nABM{K0EMtWK+Ar1?a9@_1mDY_l&5Gx?qe z%B|_E+w6inj~i_L=#%%N+3Ue|Pv87}dDdKe_4)|4Dj8Lyk0r|0pf_QyT?NXe9K+VJ zNj&?jqOh1eUqXp0U-Q*~w;x=-nkF`@e9_@y+4m-LV~k3HZKUVhq!!tb-N{)WSc3rj zuQ4(*n!uYJ`BexMT~Jbz?O3^ir-stVQKBt?>#x~!w1#jl_IZ8l?y->vRdEQrsG*1i zIwNjQ6Om7Ww;y9*Z-> zSf+Wotb32ra-}yHt?74FKH!=8o#8_cc6xHQ+Q9R<_w7fr9bZKRQZ<)!eebfY+LE2g zv@TIc0nfaWr(w11@i(kdp}d=qFa&&KJnX$Y@Au=y ziQ^T~MHXX`c0+yl=aa4b4SIcad|;}xz&MGKJ#U|@aiK(&?TwkUf~or-CGAs<`xhLvwSJB9OM{-965kA6a`bwJhQ z*mWm17B4}mL6e^5_~9&Xu}oJU$HeT1!3nFf@-nj{xoN$ZnEf+ee(4`oeO)rt@Y2EZ z$cV(liBnuQ1q!P|4~v)IHG7floGoFv*+;N&>Wo@?Pkxa(hp9)qb!q9=*FSzIT70yV z9sYDXqP=Fv(@g8ueKAw|lO=+`%@_aq`8o8ml6UZVgZ$to?zd&nZY21l@r>`ZFTV0n zILBi|{bB1LahqD7W&Dq4C0jhtuR*gU-S_e3liB{7a0y|AzGSEW`Ygb99=@_#F^Bb3*Q;o?A>embuhX|1tfL7|;^_<$$& zjZ1^0>mZPY-j>9C5mq&S`q#^Ug1vyz+MR6R<6lG5sH@O}<1F4d*C_;pXCh%IL?S~n zpRn-?fl92CAKjh11BOv@+G8f{o}Odxv|I-Z#)p19$SCO?UhdOX3NQT=r0a{B!n@@zk-wbIOr#{EXuuktkKlb+evp|TsFa%b0LW<0iuw-F9Iub?m!y!(e+q+?VPm zCVfSWAm7W7ffRz^&NlGql8tO8UcVc8piABxxBUXt4<55?;@G(kjWJw@V8g)G2lP~6 zHp;F*Egj4UtF0U7zx^6K^gburoWZuvILgUIjL++=rvFs0N7-7#(RQCJHxn+5eb_uO zZ!P)en->d`)gE}x{dx5D$)BrrSI)Evc+bx+mtVuSmrpV;zq3%mBb5I}!a1g*nS7St zt3H%3imw};yuS!2Le8mruJ46&H9;8^rS^D@>Ep2t71iaw-m}BQjQr&A0An#>L*#mY zIe(vH<)!>)6WrZ^?$gUc2l<`V$_C}qk7S3}$)L8SW$c`G`zjwlsG^A7?Z}G_3 zH~Wg1A&9^X0+H=!0r+kAp4q-|2uWE4jNz}LF@1*safGK<^>nfzrv`%OYOh&%ZJ~o% zx#hlz5=8+)ql4zmcxL-iSNS#6hUdx$8c>Ug&bY(&BF4<2WPY`n0>du^g4;rW&ee zCW)KeUa;{v=QnyP(rX}XUo}(9Vv4G$u2us#4af`miltZwT7tV zHYGKQhiXV2D*d_MLQI}lZLbAGsFZ)#a+@bivfBCzdb)cYh)Mv+bjP>ztz~fZhZY&! z#I{NhI2bd>rg-e6X*NvtE~1DF+N|ivwi6AhCEiFnSwdk4Hf$e#m~rp!-KovxH;!f9 z=C7!#!lsFgypn0uZ>#G23D50jeC^Z7g#YyG<*U#a%^ ze*R4>@*@uIQdI8$bNXF+_2+Afw!eIA+HaRtziWROZa7gq{mI*Lo8Dfl4#!&QQ!`En z-}&p#_?gC-4VB-J_x1`8Dt~Q{Q%lEE+mw(!FSWi`IC@NP!s&Bo;?)=ovvf68JCaAoW_D@|YYq(NY zdpXat#pOTpMyfWEv6KJ%;}Mb-PYz^*=+s~;t|!4`DQ;6K&P2M5$o+OGFYt5&u@T_! zsc6Lu{Ao~hFpSCja5J22l9=);vSQqC(C7VXeACM7&j2msO2Svuxhs+u-few*(bu1` z#Ssqm+}xaLx}rBY!q;p+)!%=ei-Y%Kb+B1r;tP8%1I={|X4_J#dOY*aY@ynoPdcQ` zf2U*rCXzSN5K=jPRafB!!1VS>jYY~}5M!TRdhkbULuu(7iqc!kCzf#F3~#%>#ZoHC z=%K@4$;lP`N7^&#HWXeePB6P9A#DUJ>E`2~>IaX_K_oT&d0uf5HTLT(H~8?fUCd=q zM@DA6zg0b9TBrO*hv(qFXeodCiVA^OWdbrcqFHV3*?gIDe=BgsXSXQd17Ucvf%2C^ zzlNih+C?+MDGGZO9YwZ|!zNCP5YVjG%r@{fJEOT1?haGW#MPmTGc$c=6cgi2+Gn9H zp?q_;x6YZqDKF>dPu%v=3)__DYijz!Jd&&_EICt?vkO65_v8BJzuAwI5q6P)>O9~(HqM#<(~ZGt{{QLVx|`kvmAXV z?(EX*7iAbMe4f;s$$hlD;!B z7-@r{0|217LhSHS=%*l&-Rhr%3v1eu4~JxnYI;7 zhdjzKyE5u->YKrKs{C-JlF(y~#>D=pPj|T}^Ok;RT`#Zi4P1sZe_m45zT|r!QeLG7 zucM>Z+#K09FyZ8p-V={hO*7A_+`96wPaPawwm%SFx{m+Vhe|eK$@ST;yzNE&86{fP zhg0@KFChE)$9ZQ;*<{p}b>&u`MT-{2xKg=^ z=|Yg%sO!ZKyIhWq7PCfG?v~)AYlQyF%Gz2`R(2WGB8L>`-pba|Q~7_s*xf0do0}f& ze3adCe`p7r!WcW%20{L7qCEU|9u{&UeN*Hg#L@L(ux72ou$|~pnm77!pM_LpV8su> zj7RDt#R5|lVkE6|`SRuF5Xw(HQpcVHk2n{|as~?Ksi?Yg=H*9?+P~Okl)-*`5$r)5 z^oL{plVs=VT6J*#rNSq`TS+(ABUU$s-A?L*$Br_`5@O1K5T5%Pqd@o<)Gn{&8oZeg z4xuOa!t!tK2}Nkcy1sV{$qS`4hgE@|>1az3i4%QWoc@U^ zNwX~R%FQchn1JK@K1*F*y6#Qq-!g?)!oI$;r)b~0l}}EC!D4qgtaa4zN5-A_0Jq84 zlU#G3pI^=POp7h%i_NfFddFI7(~g1(^^^~+v~QE?4VJF8krEZWyFFA-mMP|nnwdBM z+Oq=#j9?`7eq@{#=Ob{?=}|RRRrZxDFCk7zl>b&jLO1|3R9?vw$Ji;7kwy-9UA%ZP z(EyUxnV=*jej24XKXG9~3Y&?>O)q1-sc4RiB&U+c@=k-B^m@b&O3rPEx*Sr}J0F~` z#~*1Rg(p_ic1T~4JWods%7Frs94*X*dUoevzCSryer9|vW(Rua*U)#6ymJy}1@U<~ zs)k|g7Oy*p$-(HZQ+@ZU6}E|__Kpt&E=2J#It~*YNg*kQ4xAOw0dt4M$P?}|Us0+4 zWJ41`_`Ak8TpF%7Rch}lE`3LL<(#>a<&kHl;7O8oPBl6-=tUHCr)HkVQrP}Y1}r{KEn3Iwn>t-rN{Xid3xeoS9dxj_`bM!TGWK+$%@5d(AcJ( zf5S_<2)Xk={Z!N#+N+8WyA?#={yign&s#iC)h}@ubD@L?#l>BGyLM$;f2)viZXZ3X zz^KUE9<6LU=M)h`+1=Y|4z9MZN-ky9TwCfWtxaWj?LWz|9t1f!F)>02?dVeVGu{1z z(Z#A<726wP!zRAA&IJJp5e83+tjMjTkw@W@f(9If2&KXAHB_}JqYareub~mBM{GOE zaohIIWmyp0{OA&fL*gP7%>0Z)NG~Lr&D#6+Nt%jG6wOa75sb2MPJH7zD1!6hBQ?yj zV)mY!w1wqEneNZad;wM_GHktseYyD3x+Pg8o2x!*ovS!$RkFEA`RikY#vnwjLM|U44Y~YdA z_S~hk+D`(+Tts zXJI~flE+}mjOk;F?{DcP);Bl?lH4_4*7{seL61%&&N?9+$vY<|LVXWy`!LJ+b7sY} z&g;vhe4Cdghh}>&R*qP-VChf7upOsc18RP~q~-t7N&{SN<+td+c7J)f&zu^J>sgBdBtP5#y9LOuF|)sWLI zcZbT&;=18G%6+b!Z%XTOZ5S?wyiM(_Q+bIGeoCmF^#J7e9J$fj5CyWO+j)xcLwb9o zG49r^9!a(g`oMZdTAShCC|-84p4oUI#D)7I;dUa!U4Rt+xX5V}8B!sZH^;}yFvsVz zl$$eTMgaPNudG1etQ1%8U0{6FnYsO~-mN8k(Od9ER`r79Wer}2XrC`O0nbF+r`~f_ zY8~0$afL~vc$ycLZcjKrktb}Bb~a&yM8wyEw%VbRawh26RcR~5cyfKu7Ow9od0C2= zA-KMQh3i}D0;p_i;rg^4^2mxZC3{JUaaX4*tuI_Q79>P*visIM!VW>7PRs=IAAkEA|j>zKa+#koN3#cY~J8RQ~?{w_ljTmUVT=qH}_$Wh!NcAZ^9a- za*w>oF;b436mSfC-S;ZM7g) z6_kF&lr29}A&etscgo1fh&Av#i2R?OV91+W+{ny(rh$z@QU?$N&=PQf8)v zZ3WGF06|p&LgE@#hP`mR%8KTGOCpDm5lK31UovsGm??59LPo?!K!T)4qW%^vwF5T? zlwMy_)_r#rLS}$;ObOzTa2?F0EK}#+hRc$AM?c>}%}%r9EyPFf72N$)HaO3kJxwn7t}VOy(zQ=E1 z6LK!-{Px#_JuxnvcfIuYP~YbV3ZFP~#g=~Yc)s?Yjmi7l@sDI(I)nHg=Oy2HAFQmu zaa&h?Vq)+*s24U@)o@NjQ}AwmX85D0e#zMCd}J3%**9;8oAc=hf>_&qWR)zMk)h0C zjzTYbwqrq}96ec#BT5_*zuF1xs~kd>j4Ad?W$0fF;|z7RKVN0_ZeTrh13mh}$65XtlNM63 zoRH)SyD@36oJk2FWcEp{QF9A)&>`3MKyHd6%_iu9>TgOgiRBNKupl^S-P~}5Ph0+I z?dvT~CM4)RO;KMaAR0*ek2?V~`SM-eT|$yK4(tA7vnqLHmdK{{u%2PF*ZIbgrK@)aiXX|gfDh|a*4b6~d*85g9B{-H@ya#}l)lbh z>G2z1vGe|e$H>6uK_l_9Bf^35h5u$1LKNR%GbP7-0OGSmp%)`#y%_p;@EZSvw)YOi zvVZ%>FXU3lOuCX0Qp&zs2nmHG86{asR+5lphsw$>dyi<*w6YVjM^RZBnc1@W9`Ez2 z`|k6(`#zuN`Tm~YA9uG==XH+v`96;0^?DtzLj?K+YX;83fbNGABcR(;Ff!r>+oAE< zTre3#ifTyl8uPAQ!U$_6<$%$^m+5Rek3MIfiii(PHjms@DSX&Wd)L-?N0X066!?*U zh-5s=GjXA+zW$hrg~ZhK6ys)&+k-h{WE++gc&t`*bIVyqwH*q6Oa8tnjd{*%AEBVjDAcJlQujkHa=*%6PIh zTQeO*Jhwrs6U?+yM3p)oC z6m(};TYWR9^!UQam4SrRr1(4PkYNzjL+3s%=9P2Z(BvNl@=069vS!T<>zZ12p;th&S^<1G7swHU*DWum8ZZ z`9}rQ5r-{P)2rI5m2<04eGi+;m+w~qL;@b;te9(0wQhnKbwlcv%r!#~o7EcX6G8Vy-%Q)sB8%{{!ls+m#_D*xo;2 zXzyro%OHHpw&BDXT6NpBCJ|_e%<1vngPe@o;S`~%HL1S-GGwkWR=^$1Do_@f1Fx8i zV_f!Krc1rNyLZ1Iu?1NCYM)}`hYgURusc!7Tj?kA*lJB`u6(1)1kCfQ!&*FNV#6*` z{PwTsv!(b?G&cG4?}bUt~H0I-Km6?$K+Zr@9a=7b>=XU7VbT*m{vW zO!J{~&G7Ag^uvhYvW5nWWhVJPBiLC5K9;&)WAXG{vj5_m3MjB52fX}8dH7N+-=b#?D9hU?w#>+9<&CwF@Vt$sM` z!65zkJ$XOc0&KED&gGkPzZQ+;ga1u-{y4eEIYv19M8(``ho$j)w#Ek+uD{G=z9Y?^ z-&-VeWoEGre)V}#Dm#<;%<#uVt3!2{K613`JF|Q%n8^#NUG#Y4ysdL-An9=P$I0~@ z=dQ4t&8K=c?0rtw=`zBeNtAZHPRNV-LXf*n$G`7F8LL5(xv{5uw%4NQ!AIqhInV1; z=fAkn8#N#3x)Hn5&pKj#)Zx4Gm6L}S9VUBW+NIxp1}y#bgNJ6hwzLh-`rBouMuF`C z@x8~jLVle6r1jO*Jk;uMK7|GF7FpdRd)^dXW&OA#_T}u4)pvS>9l27U6+Xz1*L|O? zqR*d{`#et8G1?;Z&?MvB(xA@Z$LAZ4>UC1t8aOq+?mFjLuuEjr_jp5!)j|EW`WO19 zmfCZaQ7s#fWd~eVmTFyetUBkH?l{=8n}cF$uC$9Ihc&uz*7a4raFMnjsyy=cJv~Dc zTQ5ZgzBNu-_vKs)jLd6qhFtIz9@ zoCy8rd7HjOJhon8xMb6t$te}uVO##E?0d-pb1A4ZZ+f}mUAxHZ(l6ozaT>$VcHd_z zQyA@LUy+|>hu0&v6Hn?r-n1LUFRsQ z-Wc%~>g0293hUq8o}BsGCGGu1ujTebUbk6RZp^-xYksYFZh7GD8Q_IRX4xjQmpI;j zKG|y6kV^5)q>S}ouRrm<)L4!E_leJ=XQer#Uxis)Uss=0_vqm4Vqj-wo{`Bn7RrwE zdX_$aJ>QpLEQFXVQ{b_OURM<1(CjKzU{M z<9Jc|MmgcKGb7R!$BuWEHJrLY?x?55*&O7QH~5}0G4VWEy_u7V>v1&C@pPlEIxA%j>Iqxh$ zECUrLDXE(Vc%UgDp^s+^agld;?VZlUsJHP@6&g%H?&NT59S(3;WL5MLOxMw1F32Ul zy(d;_^h`|n^j#M&Zj*5Z0sKjL2_(mID7u`dH-aZW_+xk*a^_ubw@F=Bo!gObG~X7C zbDxcW`f6oXOBgCtujKQVhW+EW3LFjlKd&W~on@b_uFs$7OI~~ZeT$YZ|IKmLWZ#?> zUjJL;3_Bi(cq&B5+E5d zs5nk;$94Os^S!PIdDisg;^VVOF6H`ZmKf)}Y~U(oP_9L*Tvu6NtzczEH#Kc@Iac;t zup`qu1Lr%t#Ls_B<8c@+yX+ls#V06;LB1mUtp=x6+2a$(EDOZt-tA z>4+-9aDB&&`GbLix1N=!B9^ z)YSypEhi8e_ETS`qQBf{`fO)Lg*zB>Bc-KPii2L-`HJ`$+jS}-nQYcP`E1$NG}E4` zxE0ZXR|nJp-yNPDFPx=|I=a7@7$~)&yI$cYPvqHyFzunSzr9ZTpqp&Qz-ape zI~FyD=0M?G*Hxa^`0|6D4EO15WNP8qq{GD55QUsM;RD@M)AdINHs5X7v7NeA_cP54 z(*j>$?bWN-LsJB`oaf;zyE|9? zB0PqXNnNx5*6q(rX;hbvI%w}2b2FsI83L#~-$9?QtGu2j(ES0_RX@HT{g$W+`HZ;A zL0P6j|HIINkijSJd9%h4U$r5vPp+KSM9lzj5=coJd)A7;!Wi|zlzsug3WtNKkncR{ zrM;mB#sEkdIt}zZ9!wI@tUzP{0OoB&E!i4^RyxWDZ!8D8#WmhN?jm(yC)M1^92z#6 zHKxWB(FtAm!jo+0p0>?4*E^W?tVGz2=%sSFof=Pa`^eV%MEKRNtZQFh_te|B=UBO9 z3|ML}?Z73N#_PLsUe9`3*zmy3V6SBx12T-uHgMDU-nz%<_=Xf;DDl(cpvK-16fEoR zCu=OkPpDC?eY_~^YKZrF)2n_YICQ7)Q}<819?lfigSt*+;q}$F9IK5?Zqd^m&Nj7gg&RjclH(j_#(?R& zSFm_QzErjW4@OX&v7%Dfoq??E2QddbLb@U4D?#PDRl0 zR&7zQdI?wo@Ff;}{n$-E2{#?hiFP;?xBCimS91qHm9H5O;!2?{1V)*902tj1*01O; z!_26FoP$nf=~3tIGZilPCU12#_?+3&Do0Y~^i&r_PySfD-;;YI)9A;YaieD8FAcfp zSOubr7z|u;6Xz#42r@SwN)#_#GhwD%ewpoXs{28Q3Mk#YN^G=>>xF5 zwJj}y9tU6OHEQt`>Sv1{95)0wg)%toCrH$BLK*);gLt==ia2N$B6jdX-zFZ!0cGTb z;hYyft>JJ5O9rQhMxiCRIwD!)q=IoT{$K@=V0sA4}xy^;1SBe($ z<}Ym3c_q$VdycnzX}pmC+C{QUvWYHtROc;L!c$Ew=WWv~NrlGb+GN=zMYa9tP-E7#=y*c*0U2q<-)ytG|OgfvSh#IkkR@kb@gX%_0l+naOY*mm7^_^y|ghM3)_kU*0TLa z6F^)IOk9#%Bd4U}%l6XZ#cKvVpp)shGJRz}K0RW~f73&uNoh+nVuHtb`8|n8RYfH@ zYZ4@tstr%ueJGj#l|^m~1M z-+j;Sz~km^Ru+3g=W*7jGCM1)s!Ha%R&Vr|Y&3AaN&IrHSgY(Pm*76ir5|Ht6USt- zzE&r>cDDl%;WzxOHCuIc}Sz{mb_0k8ZhirY*F4u1k*8d`#tdV8oA{Is|oI zDZKdRBjBM|`4(x?(RN~J0a|)Pyb=pD;FWk{I;#+G&>2<19qZ(PCNneGNyk32ui~QV z9B6=RDk?X$_NHF%lGoNQ-#L5Tsw3b)oD*eW=aR_k_-m)h)}r>CH(vNkk6i7FD<}`# zQ(1kif+&epUZ4Eap7c(Bne0XSq?y5H8mZNJ?b{pObXm?11jJkOj;t3Nyt*CgcozxR zljj>#*=eR|JYM^&1%=ZuPpbMgWMx4QAGBBD4u__l5czidS-+LsG1|FHbm2^uR!yp< ziU$#??Dvw^WXA)xcp#PacoEn1ZqQl8(|C8bz>b%G4}I)U?Q2X_x~%)TfVnevN#baS zHM73!h}8H>ap1YhMYp)}qBNDaYaO;U9C=H^4p$oUaqeZ2$3R3nGpJ>kN->g5fNlN4 zK*0CG#(a9m)yZeHA6b~~$JzqGgv`1`LF)J{g(`F{xl<`f$t#>OHfwsx|IWYQS^MQd zZ^nL(NAL&)4v{wx2%0jE9!ZPiI<=1ib{1-B4fGE3@&WyA$0@|CmI|oP`rGGQ1K(zi z9P4?1>2`uv{(hW$xov03^X#rYb8R3{QQ>f7y*-z+t$A(oia^9Z`sz+sdcSYNZ-GW8 z$3b>ZmN3Y>3W}LQ=doOz`{gfg^1xGq#7_!DPLBHRY#l;_0hN`NJ%Q$J^W~hJtqm2~ zftNtabgXko7YA5LIy8ffOz$FF6Yer|;rw(K-Xy^;=Qk%I-2O4Ruu^;2+ zCl{px0o!GTEDM87J7kUU$xKG_eQ+^U8HAXZ4pEb&*Fakt(iUynA}fKX+x9a(`MlZ1 z@~hLd>4i`E)74V@VU$Pcl>)}HMLqYSz}1a3CiQQdZcRTM?Z}=4(0~F5c+A;hajEwL z?69-bfhxFQQPm%U< zxWki1r3x;Nr*E`>^atl`++l|f9sNxIiKUD#GTf_?mdTOvyZb;gz$#<4#E5Gcdi=td z6;HjIbEGt$hW>}O1^>;yAV(&AkZqFJ&I`t&Q6)-#4}7Yqh`XZuX!ra2ZN@sYcDV3= z%?R%-)(|eZInWgmsAX`uoFecL8O~v*L`hL9LU;I?t$_;}r;rQ1sr@0EOyNZw+jWlO zA95AX_KfT;T(+WdpDNn3^R?xzocV9_tNB;JM}A2CI2BmE$Y+nS!rp2?j@$2?`*aXy z6O+x`U-Z&>bqPH_iT+uq<(0XlCCJ5cI4h-AoKlIf7+u?6Z^She%28$7E+-oC-&Dw2 zig56dpCt(oH$=#;m-UGy7RX9gC^Cm?Y1WuCru06Txb1s+`=u@t6 z^4!Ihh8!i$$pHnv@ss3gGmO+9#XM#iAwJFsEXGt`DJ*+;ZKUz3oRho)tIIc|K@ z`yswA`Exx`@tn%mmx}rF$~Lt^+ve+bzO+H!{t^nRH`1LFuiFlGg-a;c&AU$19}Ky7 z3qn28&6d+pB)aK7pPCtea9NlJURy2#Z@kwtMe$RmP*QkZ1rpMv*ry8kn?gJ(Tx1_^$Z@+6i;k$IRWz%?tBZL5j zmE!C!!?JLL1ihsIZ(pIsxpRS?U?>~q;o@Z9JF?MW^DKp4-omoz##$pk>MwmMEBXUF z$GgtVwB{4oQiFRY3Ye%-ZUFE-Nm)5Jag&9S!O}jw4 ze!wKi?j3Cuu}o7}kt;q!M_KLSR5CpUPFg!BrA3R$Rmz?g$H3hBxZM`SLm=dniq)_n zY~Oq6@jTBKX`MnQ$?P?+I2R{Ynfko(+B2qK6+bfSHuRRN$J3uZU8Qg{%iDS;A12my zzxWXEP@s;wR4H*e39Sm!E^9kX`&jyVW!4txaouU>)}{tn2&H9>yoF|jQVs1bVYkRW zAOh(DsTB$lV||T(v0=v65{MY9{duI2n zwRL2n;+Tl@Oo`ZN`!jZ_r5VBNUC~5?l3<>*AXbW-*tB^b8nHGBO5%!5*Yv)>;?!Q5 zyn}P6aavwD6})gG_h3{UHE#WejIY<;0n;^6e`kyehoq|Mr^-5qgCMWv(K}fh;g|$P zo7+$nN^JY+BupEuY7%ULH*^iwOuIJfycthhNDd9O4$})~e2e0cj;OoifU=uw-}}@( zS@YkolWo%5p4bO;#>}XOCiNTQk{}?aGlNS5!mFf>Tk*}xw?nvlZn|=xE_y*=6`haz zYPOlx50oCa9;B_ZLH(K@6_=E?GLWlV61={hQK)dh(FmqtWP0&V_7dJZSLND_2AVU*U?#=d(hVH2&?9L zcJ27vcNJe@EFomGB?<*=x)R94E>F*z1N;{^cGvIW3)5;8I|8xR;^SkV)~-m;#f`%k z-f1*nbzQQZ3~%hU9%+kim>wNNbrcw?y*>WIKp`~3H&g7HQ>)eTtnl^jxM-^igQFeZ z`QvYB!0xm31T&vu#cR&SwdZ8o3tfR~AZeqHH+XTnHbICsT}PQ~ddLR#Uz+c{0FMo& z%H|m9o`*_9AM~XPvc^PxM4);HLzZOJXbuw;!9T8dHg=THx>~(dN@G>Og#qb5f}Ix*U0)ax^eAU)k#UWh`!R_La_Si@R+(^B`Byckvy6gl@pK zqJ$hyv#zw%`s(!`rw!aLkqsX(b03Jm!PHSN!F(XFfG#^ApF=7y=GL9DrAwz|L@v=q z?Z3-Edx{}3QGxwkTt`uN+!w1-sq79fNvlG}3r{&bZ0l9t230SfUwZvy&~2!He!?w2 zQMT-;>^Y40K0NnR)|Rn5&y=Cth@r~l2%by`uf*6Rx{z(eirm-#R)%>4VM2z3y3BN` zf5!RMh-MUz?VjdB)yASWSLw*%S;mC40cXSs&bWP~TPBMLTeU9g+qDunSRPRrZ7N=V z^%EBDd0vuQxEUq_K+i(b#b_{_DPX&8;Iy$WKS9#s!!847=Y?_!+O5`EowIUgu z^Zkke=$QyXkE7w4kLH!()xi%BgvW9Q^kY61%0VA%*2MZzsfgQ` z@bm(+63~T}e*LHm?@LRcryY40<|{8}YerQu5Hkr^kL^G6(XK;d*xv$LE zwe@LTse2}Q?AH;nFSyF+d?Ik_%5ad^PQy3EeqI$YX!snnYr{ucX4PU%DqC$);GaNN zL;fx65KvF>6)RHx1aFj=4k`5S-owKAB*6$DSv|zet6Cpq^T~pnhB#P5i~ek<4e5mu z8Q@Wfhd2L>1vGaeQUiO_-Kf`uHz$1 zUeb~?pp2PjgS>Pz=4=7-olR-6D7_=dy``0q^z*Lwp5&7ER zFgP#l@YsbOr7ef+P~SQuahDr@=q@2lM$QxxtEu1(e&oANt@&@Nm=2TD+#zTZH3@PO z^#u7i0mS>az@G2|m}MX#NyZX3`T!X6jcohhg2J9%;z-j?&>w9E7_Cnz4e)kSzBvcN zfLPhFV%j{@+fR{52?rJy0;&Q#Qbv_-SM)x+hR+Qg`lp7Vh847yAdO6fxRA5DIMN8Z zO5N)nn9Q7wS|IN2#t&&7IcPX_w> zQS&Dte!;oJ{1yin6&SGxdQ9k`Xno3V(PC{v$-_G8)L<1d z2^}`C-S)qVMRcat&N~n%CS>lUT|_FE^?UE|88b`>-{CVsuSwyuw6Ytt-mxKN5jc>t zxWR~a^0{_fdZd#`0CA{vLBQ->!DFzshG;K-en-`$|2UI9t^6MtSoDI z;pNOC3fSF+k)cR%$V>X2wo}&JZ?-e^7|2*#sR&BapOvIvJ-JG*m+AFcOZuCG{pL=m zvGMtRd)Q}_jYNhED^6ve3VTw)_?RF#e5Spbz54xIzd$K(9>S1WJ1>52xz~KJ_w%`; z0nOS$r_(g&3^WeZB!2q2XBW=5c{;c?9yw=l=73dw;t*lo`vEl_e4ud(@{r4(edU=5 zX3`**-k8JL&BElpsp$qGQ9@H6K^ea_3j;#!oEGCX$=bDDInD6ra&WBYdLany4)7LA zM;*8l0LHd>P>B^o0Puj3o4wFBq5$pj!J(nOGv`B$(u8<4cgzG*-h`XdxAyFyTF{j% zRPc6#?+mYs!d7{O-Z-~Ici){bvgYMs7nc6P6>#z5R&h_`3;0@KN<=-s)mT%2$%k#> z8=mss$!_>mJPujMo|j!MZ)Vfqo@m1-z_ZTr);d4mJv(ARlV6qp!)V~7~ zD~q#Zdzm|ZPP027OTP{mv!RcA!)A@n?eDim2?uS93X$cSK!3W6hZT2iiwcu+M&M72AT0%2k}(Z6WD=ZG-ao zft3qeEpBVMyt)bTIw?@IvR2kX-nj9Y;JRK5AAOTeeFHo@%!Ns@!lUbq;$Wy*{XLOu z`>YF>tF7tBt#7w3@#@CqxoFlZkJtZ6HT>rCv{sEFY~_MT%cU77N7~MJzfVM+7Y~vu z&OX`m=A<&7*raZaoC69?Fb5nKrgf+l6VeIc7{Ki9=FQT%;l8!WmfvHdUY%ccm0I1S z{)4?czqxWJ?1mdQ!4uCj!q-O0PKkJ$)N)Ou_Dg8YDq}gLd&&wnT}VACaoW%Y&=7cQ z#hpXN;OK-FG;K9{9G2&uva$8uOyzO)mAo)fbxO-C9@r*$rBFX+>VcDq z$7Tc@ii8)$a}~R685VNkJ*1Zlz5i_eh9~Dox|~>u&IPw_-{yXLjmGQN9f;kmRIZ-_ zV`5?~imYT^$y8VEw|$B{rpC}C8yz=*T8MK%8ggsr1;qn3Ttvk>2syIUg<)YyN06;8 zOXn4r6&U%W9B?eq_3&!#m$|rb+pR-N`N8qw=VqQ!)SgQ`Y8LUt%}uLj;t_?S9V|+1Ug9U)mK3M_wvT zz52*AJYA^vZK%kHh05C@c7>MvY|W$%Y|V`DWKK57CgYnLxq*$D1^(vnfZ7}_xk+CT z=cn%IV;ncu1xhdO0$|7!^!G{aD{J!=GDsxd`}&Jj8FI~-*p|F_bNNVx$s?xI$;;!} zfal#17oTvT23m~bYjE&~RjIZN!oFol4&Z#-V`L=Vy}P%a0qT;PhB_!T)X)3#wNqR4 z*>bN%lW)K0=5yGe1x-)9v9wjvOS>ST8Fpt1S8_F$S(DHKa9CZ=YXXq1i5z zy?kt!mqObTBei{f`q{G1YRPZ?-@ZcU1h@fk9}*+4{lW_x;)(iD0a_Ya*(X~oAF)y| zpIV(iw=P<3`83tZ;^nHAP!W4jjxWZcM5}^1v9OV3J)$MPSKJfF>SWCY zHpX?|JELF1e!cyaa~DitybD95x`9XwNKf#kVibJR2s8-PWFXzb8ETvamLuzWhdaTU$08h@3$VChvFpvggz(7T_7* zl~7c*y)qTX2Sj@fwiUaYM8NspiB>JfiJdH+cjRs{>Z#iDt-<*aC-N*qm3^G-q@{l zS@#v6-)q&l>3Y%hmA(FVpU*mYh$dgP-vV8+mJ+4hkr_h~Hno=zhQ;`rY`7ZnW3{H( z#H)DZ9_KPY^45Cfxzo@$0!n<4u^+Av6@~i5fxG28h*lx4rwbq%YqwF5P)<;HJ{5-p z4uDp`tLL6T-#8jLjavIr!?xqd6JVrGUYWq{qty#av5RBA{W9~+@>*cpgRi%W0!o8l z8t)A_QT%r8&7Iq#&tBJQNcFl&5-A+-*2rb#xV4LHlZfy#sbv;*>9mnQ@FF|C?qHa& zPjcpC`=1JYaWaYtc{8Q7*q;wMHS#`MdI?v`dX0!}yZGq7CtEBoZzbg)kkXRp;H4tD z0F?lXYccLLbH`1VGlvl(4GNZq9-(Y5?--ze3 zXm>$UJRQ3c19ojRc#WB zxEEt(iOkUEL|(;ciF+!DP)1i_hu~#VsRc8s8mt_#Xat1;yVQW46g@ahY$r?=KsRh`1BETW3;qz=iqQz5m9hBUbtwai>OH`B&t-v z$>Ed)+y&K@Kl_>lZDS&CxyL|p*dBjF>?j?H^N9f3?YUAN>iS@W&+#!PzKWT>cH0Mi z{#M=GZH&I(7@Ujup*Jwjw0qnx5({zWVguT3I8bqs3JZL`ovS%p;Idq9B)+Owd zv%%juqKP-jT1=_o;x>)Yu3IFQwZIb;p;XSsmdW)-QBV5|cqP-C1aYEDIoR#Vi0jqi zZI7OX^9&(YI)@Oo?Xi9bMf{(;G?bg3;r8dITx3*PZ4yOWaix@9ErJehsA3U3-XM<%YPs2|aG}P1J_mnbg2wnEny{D**mLE%i$SH&m zcE*6KpiNjwXoU_L;%g`tZv`ttP92}Sh1XLReW`f-mLe-8(6>ywG)PDf|n( z8B*(oZbOI%s#u$z!3vX8W>opjaLl+t5aHzSp@GD>fggQY4PSaI&XH>?D^+txylIp| z*Q~f7Fa>K-?>?~|=-U)9q60fJK~t@|AoX>jpX^90b?C^y_f7;mMXHn5tH#-ygf|4^ z%~#jN-3a z{I3fu_<3P}SKuU%GaF838pFe?O6TnHx%9;JE?OH_6At2f<`ksT|g9TxX&hZp-gK9_EpCER3>WU1YSH&qX~=pq7Ci9)8{@xYw|kX}|~7b1nSl!lcMy4*w7y zFjjVReTtAJngr>hm5-dDoidtx`B_=1EuPO z%$-LBLGO0L2=5}!XmK82We$50_~S}T1rk~~;m&vzw~)8J#a_U-EwrES5_tbbtH-Uy zySezLA)|_FaD=QIb~!e_Ewqb}L6y>mXaFe~PXHe+I3HtTD2<`Uah=U2njux#*6@ue zh3#PbOtSVuhnPpLm(Bhl!c8Mam4`~T2}RhM{Ym}B^*nfpGOG&hGUPQF*C7^-Z3f(= z(3$omvx1iCR91v$-J)c&1l+IPuouRnfMcvf5`XLqg=R1ie?X5F!zcM8WR{MwCA4+$ z8Y6H$^7tQhLrl4TK4{;9bw;UyX-5_UMITW2O)kv=2# zXVd=x2nBYU6Ka%I59~)F#wkLl?qN&`cZ5f6A4vi9?}6753uj8Z-BZILTj)^0`r5g< zA>iht8p$RP2@X0GFdYJly^4KweBm!1n&1RH_;Xay57hsTX$ebU%agt}g{9a%2Bia% zq(h+iHO3|Cd1`|w_Y{O$&lXx$okO+r@MHO@NZq=JyV2{#udRa8ln}V+-(>@*++hi9 z=HGEyfv89aYf#ynj7!oJ8X50kM>(MaG5Hf@C#Q5@dZPnAkJPDKlqf@MDr*U1T}Onh z7#Z3;Lhce;+-_H)wTsK3v{r&R_ME7Q$3m>mq>DNc`B46YW)3E*2$-%!p0_}oI(h0j zEZXD`MqtJsM3aUXLK@o8;$wtNJ;jp+;5@4c#?n;;0ZA;v@OsjrU}J&Boo4i<4L&#Q zu>6i8ZCtc2q&R$Tuz@0GlOxXrt&UvJI+R%sZbMCnvf(zTq1|uClYO2w4>VwH1>MK| zI{#?h8LZGs%ECkzlv_zLya6kL6s0?e2<)Xshg8f)hg4D!`E%RgZ-JMa`|voq_7Qa5 z50ie6vVYM-P+D7W{@tqkI+jT9M!UcpcYufO;WBme3Bvk!}*~>S^^*?oI}|JM7QKyG_(G34PLk zhe(ECXY?yFuzi0C{}3K9L`;%?|$5^!;yHXFOzZ!(8*$JX1vr89qrMN%;27K;xf z&)Kc+Ye;gLL*P{>DWu&?p_O)8Qp=n>UX^fevnP6|jji~xApJL{^eY}gzF?rl@1aD; zPk2216aIN`co7Q#Lq~P+FVsVKyKEzx(ZTyc8T(Uwm+npG<0ESrWqPjau{l}zI7U;a3%~l=u_nkd0QU}*ks=@t~l}p#uW$W zrWlJz?rZ+odI9gVOIu~P=}PP=cDP?GRfNZNsL~Td2{R-%QA)54LPE%lDn0H*-2MXC z5=N~6R)svdW(NS28IkXheD8n~Ax6~3HyD*X?d5OLz6I;dBLn;Ayjm(Xq$=-c(48%V z#mJ_}@4f>IWN4j%IJX{EizIPwGOF@INt#A5$_au_Pn3s4a#3PpLyFt~9(W5-IE&(C zDTaJ>bIBMtCQ4-lBCA0!1-Wr_pb$33jYYPj5PNxTs6wmHY7udpOE&59p&#)79n%_I zV3}%egzN|sYe)y!u|w#g_;o!fq9omG3o7n9WHF-!6KD7_#S_(CgHI(KWxMVzWXm$YeZh5P5ow*AXeq+!KMd6w~0VBtE1IG8`??L@E0X9r|!df z_R)&P_tD;u{rA!?q+|#ceBz#R%olv!IQ!bxckivtM8ukY}c63t=)o96PV z{%SN`1Wm^TrY}O|JURHke5;(3D@nYBJx#cGX_rVZBdY|8|NOA>819v$y4JhBI_zh4 z{oV-QU(59(0?QkCLzVt|glI-M(X>t;hL$S36o40~!$N;J84jkUK&y_`qzl+4!gzA< z*_8x_^4#*QpHSXa%iEgfXKX$>0jJA^Byt)pvdX7d_^!s>O#|!xW z`UV0k)5EMQU7)h(9BD{D1!EDw`T$UTWc>ZMB7AMM9=<2XfiF(l*kS%bm0hGg?)S#} zc0WSP1w3Q!oeWg9^y5QsF;|mUM!z#;;2iaz$_m}R|xFT1Oj(=U?nm}Z-STtbZtgU%h zO)qnoXDHHa8}r@#I0uG}$!wGkEhX`dxlc!%WF6_qoSvt{+f3^7hPLnFZ)$+ztoAp& z#RME5suJPlcCJs%AgrsYJdga#fSv~;{h-S(FBvBa=G7atF~V!S&Cs4bzpTwn)BWSr zzCFv1|7g#qK4DZ{boPOSW^GQ|F-eT`{Ti=Q!=-kWZpTIIp23@nCpt|>Muu;T3qYX0 zIVB!;zXY1Qx36s4ZfjfPjDLXGuRya)89zX->Dma`T z&yN6^i^GAe=F5ofNJ|C<4m{)>tm#AiFB~Bo4ZPiW*SeLNKIy8;-hdR-p8h-W@1Qli z7jzB)X23FmoQ{-9ihBaqRDcsY{$8?TROp_P0kWA!nH+`=LW}kRAfB<_aWtlO8#uuc zO=^nY`hcnCIdB7cTD0moGy17{cFkr+t^Cyu9e!D)jT^`b2Z|tP+)q0`qPvtsRY-MEh zyX{(mw-o1YTPMDAutH_g_Bfg4c}A7I^FX^|DKe>d*SjS|9k-+mAefC}m7&(p>{uPY zM-ruzN(T_+9;G?d8zF0Jz5Oe_xVSjd?iH^5IH&8n(2xALAx7gqc~aM0lW7lORmWsZT&IRrDGNPtfH2Y}nj@ zxlh7`>8~)Z=q(JIt?qdK{JC#D(Xbe&5_6Cd{7qybQ$TJTx;Ogop-?7uF$PKs@ahcQDdE4^G;?MKn#f{7%5DCpZKbzKqG>It7gZ&NxE+=?56{jJBtK(nmk2aK% z^O3s8#1I6Q599_0foTGXgB2jYKBNlbo(h^jQj_3{;6n;{&%H3r3ggGpFxczQE{{wQ zI7#(Af13&kc>hz|7lT%43MpM_(<8xi82I()t}6L4LmTl&7WiU->Z<$T8hCGIKsBJm z#EhyIM2bC>)xnYi0RB9&BE}E8Q3mNSx`+qib&`s;-6`eG{!sEL^2S&7KYmEwRtkqJ zZ$XFi@j+$O8-T=P&r6^}SJ4Q==uhwU>?2hlxaY*mP*EGwQQkt!QP>F^JX^VfUnM$V zG+c5fU||S&4D7>@T4oW@s38bp#Wcbn%2l!;9b7nXD#AsKgRq7fmad)qFbc;8{{#lt zR5B-tiFkh4c@fVjUiQ}JV>}`4K0HBwHSkPA(3f8TMyg7HC?oRxZaB$>sv(>G5ybZ) z7?e1(U!ON0m3N1QL9sqKAR;tev3y31L)>Wd2QGVU$iY#_;^4boQV_YOV|Y^H3Xt zT@@@fCIm5Zlg|i|YSfI~5+t?t11GE^u>=(zu8@zANiM*OWsBaU?<=jf2XMcY2JH*gvV_+C*g4(gcc`!6`VJTmWq>=p8^V55Mf&g zyhHW35X)2xjtqxWl9$zt67}o@8Cc|3JY);%@(?Rf#Qvqt`<0_G1aDJER}I+${N18CpMPi4wwyW1$N~q$M~69VEQudX*m5y2;93tqNdcv2ESwRPfzK6 zH_SAgbu0?|1=PuYyH7ey*zeHl#^ zASwj@?i54r|3Vg!#B~)4dbla77ygmPoW?DIPr3LBonKIxITP_GWTy&^d{voGCWdZ6 z7aT$j*QISP=0lrM26vhXf_-3=Z23 zgbw=W>VN1O=X!nJ0QeP1n1Ym&#(5NF^3Q6 z?^>Ezz+Qs)Go;=hO5(fz{ey>ZK~(v`n}NHpj>I6WBs>tYJY(E~o@c9u-XXq=6(fbP zR&gg67({_=XX9LcoQ3wB3-C2^kP42Cy$hI4=TI3BJg2&cQDj-`)4IjPk*MRm1>A^` zw{Ou=0YXf&2s(|nA#}sIun#f9C@TzH^M+73NQcQO_F|%uli*?UG^dAp8PZ%dN#OOB zt^lyIVui`{q9Gw6Syy|HKA;b_G=bRu5OL3GhN6EfNw~++W)lHMq4H!Q+PHI|)&Rpi zajexE?>SAG zE1+^HF*;_0Va6MN!SW_k39Dp@z=PCsMf1ty-s=>4xO&&$y(JPA&r4O|pSwo{0mR!R?;-LHYzs+n9 z`Szb+LDhfEV6&cAe7K)9#w;Din5jkE?AOMIyMFB@s?&W1ub-g%VIFO79S#-Q&_%sH z@M1PXvv$k1k2qB0H`%Vo{ham3@=Z-0KMuxTXhPv)UfxwXIvL4110&-4u=>Q8v}rq1 z4=-lotSY}k8&J6k!>(94Sd=IH5A+P6N>;)g>X00m$Mwsw8%CF>j%C7N)lTQ>6G&|J zp!LOSVfd@-|q8~cS16;?rz4#kwYCKcQ9q- zMEmzL=w?CcJ}bIwbzEvQ(`qoB4PhsJNOf9FxKZR=#|2X@oo0t`H8eE1FOlhEv|QK= z)taEGI??>&7j16;Ohn=h#4A3w@|q6*ZXwHVXZTQPZp)ZSt88P!#RJY8XM}Zc2Go@CH-Pd*)pjSaW!m<3X;Xmw|?kQz@E)|-aBc=3^=P1 zjvbiU^5e1b$^V`KlCeO(=1ZBd!JPnB;o zQWCr#rM2NSQ7a~?luJqtQl+fA?c4u?xDtJjY*UUx(81g zYMJ{bi&3v~4#Rkq|7T?qAx$Nc9G3%##?`?FXoW)7=it5q4troWDrsZccc>XlL4l9> zi_-m>^vRT=!BQy^z`#NprUHZeNSdf$6s=fSgy;q2yt)j!?!mTaJjnkRtk8Q)X@uqN z?e|;d_HXvzCf+nu#!CeZ*Z)ht`7Ijo03bB5ZYc1{o&WLWN+Wb~)c~CV^RsLgCo0K+ zn!=LFzjadx3$ryL<-ox@7(qPtH#$agel&g!E?4aU61TlHXGS;|IG`HByooIbtYVt) z9HoP*@Ej62VFribM|8h65IPhTS? zro0!ko|C{e9*FcAO82A#2z#j@-}k7zjHT^E9{&J})X7Hak#Oe zDw!UpN6v?cW_VO$`<%mIKAWw78RY%ql7Ue-0QsAFli1)0#wDwUM#vhF^i5HqYr&>; z!(%|=P%aP*?0<%+A{NnoNYPZqUt`D!Bcw`cH-lBRB8v~DH5X9I8oVYA@gz!_d!J`v z97ZW`sv^vof)eQfqHb&Y$lEZH=RITC^FcMapi+;hI`z8*0Z?8g84?p?HX0vIfP-16 z69>n!GkXZaQ!dB`Z9Mof*rWyK$#`2DL&ao+m}%TJ#?00HYjQaNr3O(i`BYx zI=ElHeYB_HJQR0FB4r4CWNiwjM;caIy5-f^XZ%?E8TpkrlXztAbfD9LV$bQvhiz=0 z!}`(m{qKp3TbLy&sW<;eeR1G@nhl7F(!1Vq>+M_lmlhwWd5-!QU(W8hO~vVqP5VPw zDol{u4!voR%N>CCyk};6Xf;$y|4nLHjec~KBBcgN>a39k5@PI{;EG@n&3|9NHMq^5Iwb_5Lj)Y8-a?_U?}u#qn_M20{w%<&d+mc( zPG89JEFkpIxRLqM7xfcp@B(u0UtXA=gng;|Sp`R#+v+rDO}q>zG$WV7NXFYIWmYzU zJ$D79qyDgeU6|-DQFYtX$NfJby=U9_gDjB54AWsyZ~oiQhj%IfWTdx;UY=Ei+rl{k zCxs@>WfrYUK%49xXron*X9h0>v~dE3E3c!o6)mUnqBZ?S#?UYSBaQV3qX%GU_g^c? z;5^#0uW%yz6&uX741e~(jQ8?FmnBPSv7{+b#_UkSXgorSiFH$tdHI|Ya-25?68#c* z*-v?;)7BWyDn-gFU>E(LxODRuK@4W*43As4Prdse0b@TdJUGnP{Ga65@Zjh7?Ab#D z?c}PQ_+0=0fil~MLz1%T)_)Rbe-`4k{QPnQYG-nV$sXzk2Cpp%VD{MX(p&>E#KLbT zJmiikS|$Itd2~unNlLMsb^o=7`^cADUaSVrj3*CGu8Z1#S!ekb*eUkrjXafuZf5v+ zYaoJ<0kL+z{S#X%<)?`keqme>{!*TS%;*n{KKl5k*fGkiFeHe%k^DQ3$53?FHSv7~geNd%>k&*9J7gl>2T^2DB?>Kjnv zb{|Y3S4{gpR@OuWlX|2y*l$k+I|5iRaZi{D+2y>N`&=w7adX zK)#QMkPlocHKx0n*IDrc8LWHHu;>^_vMW1D!R7|Vh{119}v_kc!-FEn`;%<*B zfHw=UQNpW4#r;TtXO-~%$PrD=O+f$OhN%v{pwM^+HHsKx-18YK%8@?Mv8& z$*%_gUnGCV!Vn`U0J;3LB>wOhVBylwfs;%iXCP_|gV#y;`dUMx+~o3feQ%6y-PP_v z?`0lP|3PN{4^FjGe{@r&(tidnU+ODzg&ZM)w!inpVn`)J@B(PwwSqzO{!>oRPnaR$ zjlcJ`LUNh1a^#4*I#Z|Ph~S}>nPvwVL%QdU)Q`3cs+@emXZ|c2%ww0NR6n6iHo6C; zo!fW((!jI*v4NDaXVT%2DyO^x*w)-VXO#=G9RVCp(>?;oSZFF$yJn{R*RYB^fhiUg zuTLNu#PD!B&5CP)Ii)p>Rlh9x^dFWXo_v$FmQE{&O?BH7LZ4J5unmc?Tdqz5f_$_( z@hQfdJJ%-qz_(S2b);{rkksK}yc`IAVI&mPvVzBr9vC*s1pYSWsK1ae12=Z{^rfP> z(BS(XKFL(v--}(}zBcn=-Td_VN{7CE{a~ne#k=I@KsMW580)-=)CCItbpRafij+eN z^An}4ODE@NwrODPi`q!xke(wS@5&JsgE|*a&&{2>oEbFH?MdGgd%aWE&TcOXlxQEe z{Z^APvES*30I-3D04JCa} zd$h%%0GXX!+&!Mp02wRnCA5DvnfMKS2Q;49saBi@ji9 zFY-0k?8A1cJGd^!Vc`zUkzMniTG_FcSNBEkyO}Qo^b9}vEbqXdo0cVlz`#nPq&Xw%JZ|J_g?Z;&YT}xol-X_9m zj(CGI5T%&(sg1H=hTqaIi{AWnpg|OJKjAX41uk+&WQ;8CD5(5WLVi!n?a<5U3vI49 zd-9tV_hkS<&7KIA{y+Qv%=3e+Kb|%H3VJ5nMG^01qyLVXxr z{2&pVK1KA3=FBu57X>p|7{N`Zq@;Avy8H001Kd0R2l+SW=%!t5|5@D$pNYc_#Mu5H z_TB`p#painJkI0fnJTsC2uL^# z9KMQZJNqKuILA5>A%L_~MDzU!1_R)SwYzH$d|@G@>G=mDFKCPEuq#wn><{U7+sb+^d9{j=)A>u^2* z7IUKVTiT|Us_cg&facd>haQZ-G5Z!pMHdNHv?e5rl?)n;aP|~wwzo=v?`M!lK2>#T zHBq4CMS!&gQH=rW1qmK6kWHm)XCnhbm09)VFFbdbdz6fQ%c?LrCjw`nS^F}lWvj0K zeZ+bG(~XDSa{n?n3=ikhrAy-`O{!_%G0y*kt@O0#Ok=p(Ak}|Cy!wirYGvAH5$B02 z>);6^C+e4*#q4GVvQgY9W5ZF$U8O-W%QNosqZfv4P%) z0~uJUKEc6_U%o(l`UJd7|9jwgh#gl}-B=J!YWq+OSNrn!Diwd?$+~T_yRI;CWGJ`c zD6-CPo`f+$RmSIrpNlkvkK)m4>Dx^L%cl)mBNtKIyz9+fG4a%{o$a6AKP+$dQx(7Q z^*$j~QVIp>BLpho5zRL9LV^=B@jZvsL-|uTa2LexjN34s+f%o7{ol3T;y9AAMxqPq?w;VNikNY^` zmtSr<_c??a0ZM}#J8yl`^f*UlJRi*UR{oIxtk`*vBp=f8WLWnOq{?iG2JS( z)h-ZTlA7YtT;^58dZnejgGW_T%w3_I2wJ>8+_(Ln{k)(I`Ex;v;kKl&=H1n*{VOJ> z1(I1*PbL2`A-7&W_v&db#qtGe3|v@> zDk(Cfzsk6?@o@Q*YC^^T#D}6$@H~4^G4!K$$BN4$#s_3uS%MRhraw3kzd1W2q$RT< zSQ2D6V!yU+wL3a&FTssSM_KUF(uryV_k#Ozx|ur)WdsNH>xzpv7x$X(e$ic+1OdCT zyMC|7na#fDU-3EfFX5p0PT;(GhIpM_VWhPUNSVVZHMOolrC)|z(ct$y# z7rMF*Fs3bGc=C#^bZ~IEnlFvUalGPs#}Uk3h6dMxmgKsH=pMgXSr3{9!8_0;m*!d= zsU_nedPQIhYE;{79J415(PP;q#+{6t)8~b9W}_@6=J;9r>%NXeEtefy5db@nv9!QM zXa{U}+5Ll(q>%_^&LO^6RaI4R|DX+H%0e2M--e}ckKm}Q!l9@LYXL4TY1I_9{lQcs zLy2>#t@ixh<$clN7H{s-PCgqm!?TFZt;l2yaz7|KhspEsG?m7Qs8VDHV}D3O^;86U zLrWvQ!zYPok|f)}w>o(O@L#!pozTxo&&=AqP|sPa0Ts}qQpP#~b#Vph^H=uJMV@r3 zcT666ogai?n%ec#A5|ave)=Owe)z_9*A;5PiP540M2bmr@CvKo4ZAGL?`*p1UV+9A z=Xs?5*+-LmQ~(|o`J^uR9#$XA7C9o?)lRma_0GB1B1$iL0-AnoXe%A)&Ucw1w>`{7 z8z(qLu(WM{U0iuth(C!P48Q#~Z?t~RZ2QrqfcozN`yN1kXBDHZGZdyW!xh~Z0%K)N zT|AOB>On$;U*_8L=j%y(Q9``F5gWE_A19=yMu0jxjr4eUe0)3)zfi-Y-_Hyb-rgdZ zj6$J)loO?(@Pm!z7aqMclxz?uX?^iq&oU+5O$wZa6#Ze0Mo!*-U`Lx6)VgWwoSFe{ zaMD6n9GpNv=bc7IFTkSJbLs1#XvIN6$InTcUV^OL0Z<%ttdc_hh1|jsd6tO@!Jp=( z9$Ybl6VkL3!d@nb?Zixs2Xh2ov^M!a!$*uszSQUgTWe}iOs@2uKbSbH-hpeKw=nc8_b4PMSEA5F z?fOQ3qMir{Z9(c@^bjIg9nRqo732N3{Ud0rPWzRw+ry#fhsw5(;66}Acbbl2?2fevkBE@=rsL1pt#P+5 z+CLgJu)jq4@AiS^d`4^{_ms&KE6)io~&oZfr_Cm4Il8+9jDN0<<*wg z{{1un3fG`jnDQ)>f0-2x?FF}3t*YMt4?+>##J3IG*}^Pmmi|gC*;fyg4Z?B-RD$ZK zQD8I3o)(aA=~%>MGRxwPxzP$eswFKe;jh(S)U5D1p?@v_L&Cyi(=5~AWDx$r%%w{c z%li=aBsIm{BlRud*5r{@@&f1pTf3%EnxYsAkNKhLI$9jqlE_N}a6n!mE0oRFeI8zp zv^U&1q8We{Vfmm#Q_r#O390cQR6Y`o(BkZxG)bj{>4k%{14MP)R2Mv$h%Cp03}scz z@I!+M6iUf* z&avB?U<;0Ss_B2s0CB_l((n|1)eLabLAW?NR1;5$%8&-KSDVqioU~phU2fIY)#I?I zDUcZDlZ(*cRd~)EBT%;>OMdO8hf4L*KSciz0R0#LQ}o|KD18Dy|0&`V9v(40jWm*K zihVf!)~#C`1$Z|r0L&&O|5Jyk%NHmrvaBh!6bSV72b_;7w|o?YkAhA>cqOo=L-b+@ z*+AoF#z9DN@DdCg%wzdqi+eUto{-vA#Z3xZlRli5v3j{-sm6hOXh$6?xBk+6l|N9t ziATl@3M_0v0FXn5baK|to`;T8o1$Br{HXFIEE{Nvy-3_^0B}HnbZ$|Y1*k@mcC)-~%K`@8EVy+g0L5ABr`4%YlG zV>x^q_Sba~X`E((`7C{f6?b!x1~SjxpRIN?je@vuw(ol$E{#tj&MJKPHpIu37Y?}U?+fY?0VFWb5@m5&ot!euejFXZ!q&>Jk);3*Z>t*G=Ru7{H|coldUOI%dau< z$rGS^ehgW->to&L3jP__Is6_*_J`EUU#solesuM|A2=PG+A}&*D|SDo`)#K+hdCvY zvQC03XU=lGMpc*Jc*+QP%1A-dQvJ!_(%-@9NmdL}C~2pbzB^8oZIM1m$GLM0pv36C ztClZ(FeoeL+D+k9CTFN}hrym_cRHXzWsl6|{J*2mgJnu1Q9BfpGr?=vhcZ_xWP9k` zH~;>4=?#BRx`#M2!_S-`Xc7buek&sIUA;#cxDLRTDnx!y;V~5?+n9PyS;Pst%jME( zzGrX*OrU7Tgd7D41WL;D99 zjpr~^Zs3~@%hXiHY3{0;{}Z6~bmy)e?M>4)YIIAWof;lU!YeCyyB6Z7|}{DFGbqxKZv9HczmERV)Hvr#}i z`N%&4tntC4^|{&Q1l1xxG#QAZ$WUJpEV{Mp){&Ge8WQ3EtZi)eRiquRiVeKq{Qlwl zhKyO0{{b}Z9m;u5q3+LRh(NGdxnsvggQb@YU;b^SJ~xcqu<c10h1)FE)v(B(X%a^{WK zt`=KD<&K1>EN@)7ZM$+EXOdELEl2L}3D=9q=u3)Q7G#%vC_F_<(f=WIX~zZJ5>f#V z%clco4x5fU0GRn&X)lE57osiq75*}Bl=rtidZe*y!Zd^EgBzcd6#D;!HNVRTe3y*h z$R|7QMs}~clm6QzD)l@!4C(dtz^!M8+>8b9`?a3XMU6>+2DR+%%+GWTR`^6I{XbT*|VO}H*=y5rN)qJ@P>t8$fHUn2*kb6$@`aAFDmtmRX$L4zWj*D*zl8b)!Eir+!D?3p>w!fU%(h?U~R&P}~e~KwL@nOdfS&s>GR+{A`FxFKU zaBe(uE&mC*a%;4%KSZC!>#kS%^2=teO5j5ad+peRFX4Ro8iH0HRF!xdx^QeO5F6j`n#97H!rt;ak~59Zuw*0 z%(LIJF5WnL1o{4^G_J8xFZ)oc;Zr%&+I`be_l~O4p2+xF4Ntl9_uagb`;2ix&1VaZkT-7 z(P0^wqLjhl6xZ=Yo5YD@im9x@Db5I^rYLv6%J4Sx?A9i}o&d#cZ@ysNwNvCAlR)nh z=)Ab>$@NF+4sB+A8n67?--s(O(!fFb3GONRjql%SZX^LWw~9c zEm%kQ@)n6px(PL*7xt6tx0w!~fit#$nt`Yd_ttd(y2s0V7$6#FdT zZpcWx8^3d-iuiQ14fiQt?BD-r;9Oq#DjqQ$tg1}DrRH~94)zPZq@Dt9-G`QmEh$an zr}(7Rf0@0$0Id&Z-N@q}Rr%@AL7Tcf4UnwcQ7K_s<#W3+Ud#C;R<&hffLVJNzg9!t zrPX^v-|NirO4%&-rpDCv>9eX!WhUk?Q&b)Z96Gr9V!uO%v2F^>_qF}UsP_g=p0BG- zzkWK^^SmtD=B3wkPcuKCjJ+)a#!KA&8bg&zCCOrWH}GYiiR9ek1s-zLGjg;Un2RUV zctOcPf-G_LN4NnA5xbTVQ(0XHTvx@b)82QN4?~NhNav6N_x^^v&7t2bMqcNKUN}WJ@;hsA`LD5tMD$a1&lWg~hUfj%(gdy3#tz5E#;%GS+a!I~ukEB-4(|Mc)w`U8 z#XL$RwC@;hZd`S#nA&s;J$NO9-aoLJ@2N@lvr^av@29WkF{ZE3j=lf+q+DP7mS!Lo zH*(OG^vwaY^7iDOfMmJ*QkG7_LLLI!_r<+ptUH%;F_L*U>*8>(H!+LPE6$UYwv~3D zAbqC-Cy_+Y0dZrsn=U;1`D9Tx^*WxlUrTFDhY;kegEdoPH>%Rjn4-FIS?lGp3?;Om z8#;I9&Hk)NKa;+PuDo%2lM`N!Ux5bc=tg&rh-{6Sx_E9!)0HzD{e&k?oOnfi+nrsT zpDTN=-*+*@?%^omioW08CJTN%~(P{oZ;%XUb)&z$2eb!M!@ zuJbjFyG@1Zt1zKERV@>V$QR*vc6We$uHuK}Vql6nDgh1wO;a;DHm{OVtTfb zwEMX=^hz#?`hG^)8v5I?N=Z}R3A8Y6)PvqeJ!lxmv?vp+>groJnxag6o(N78HC-68 zL)txLT*6^SNti$LxWTw#{{3h^lk+CyYh$s9oes7~m8>?)%e%(h!lX;LP<`0t7`>eE z$2-UQJ%POYROXSKtZ;cr$0z7crMfTh_N$Q17ySM1wlli&t?4=WaY2f_?&elP0-K-5 z1(VC;GUPHuF<=7iZ0$~tk2Wf*p8(waV?LQ~Dr6ONmUa3C%$U+(Ek%t;4($H0 zlzk>4gSy)~I^>k~v8ywt^xKG$xtyhDIFDQgP?xjFC7CyEttCp9*6E%!EP7nSSSLzP zm%}E;aBm#5ehhbu4ChaVTSVUIM~(4&Bl}%Mx#Va!1+XJe^r}?|mU0`4%o1C@@Me_* zu|$UF$CBgTO&^Ch4%*SY6I>T3>*lSI&R92|@i3Xa6O$f2!A{IrN7rGjJjhyjfl2er z3FItWNABY1tndfSR?h|0C>vmP+4o@te zxpub1E_?bn1$uLytAHbU#)CKP!QeL(bZ^U}WufsHO=8L?1}*!85jGa8AMzDG`zUsr z24xGQ|9bt25o zw6&r8;c$`R%2uGCk<8G*dl$7npz&-TmZ8Neh-R*xE>T~?5FX34HKWsd9T&JNX|8*1 z_2WHnFL3l`h4)JOxgH%RxKnd<#W;MX68&w+;c7v5%RB{ZAH}7nw78sg9DMOxhs|VW zg};$B6kyuA(9LJMJ3n8`iCaBVN5oZWVtVUvrtKm6$C>Vy`L0f^f@UV|<1I~^N7Tgh zIB!OZJl9=cMR}7naf$kTFJVI=j^oFRIhU;(&nO*wIg?Sgn%)=0k;W5AHu-qQy{vEn zNyEvcM$0_jFTHB@aR2R=^W0T*7(4NH*AD9KTE2=8P15i!OBrQK^uAz@G#7k}VC{Ks zJH|5G&FCL*QK}2avIO;v=eZ|_$zOTa6wO*V&r=b%N+;kfdk#K|62}kt4*nP^sEZ+N zbufg7KcmEU#^P1<%BiZ%sFqc`jx!!f;bh*r@Zsw>zKSk!J~YWeK`QXp6cwxy_ie?^aOy>ku-D$4~aqU&@Z3I>Y_A zXAPsogwC{`#yv{mShpUnmpZH#hfzCrp!-7l`kC&>r!qx78Jt;51fIQ0c^)Bh(r{Jo z9=h2fa(Bi{d#rW5*t5cy{3e= zo?1OIUlKacDw1|wevn7}0k*Mw)<~>Jz+>uGCweE~Z69n)L&UhRh7Ct>{Qkq{V4&!CaDC4+Y=`$pWV`4~Iyz^NY zj2_A4lRg;LiI-iSU+nW(ss zbgN@E4C|FebO~)2O%vL2kH5Blj|MtlC<`Q-QKFlCtic{ zQ4Fy-a1}SA;nNk9=RxDs$Ilf1*DdCw0sD+GOY2XH#1x#Jw`HrEnoo(-n$vtY#?M?+ z4^~jAkO$u-(8;s_?cYR|O*o>7<@ZK-X(y>uHbP}up6h_#GpvXxW8xs_W#dwm@)h*5 z$gAX%CZxq}_+Axu@k$OHJ5jn2Q!kI~ijOw3WL;~<`Lqr+KOE>!>1mAYKoMUfK1Chs zoC%2}dcJ(GB}KLQO=2lty@~(VUGSjYkLV;wnU|@7d|Lnw(aX{YRy|6vUh<3?aMG|V zKYWL?o5CTc~;wG|Tj?Pu3o^odd~1*wNbA zf}V;?lE~)PenwrMC6_t4r0FeijXsOr9dWXWGQk$`@OLg6xxd?w#B(CrYa`t8_#4r8 zEl`->@2}3A@{cDw3V!dsqiDR6Nw)IuT=OK<{cR>XF>PbL+1zcdz%k_ePt>I!P;gl_Z}3ri#%`v8l8Z=ZM62Sm15iOM(x}F zKGjL&fg|p9MsO3FS;KUr``GOy_jxFgj%zVeZkSC`q%FAzc#LqxV_Y|e7;+^{F_;wKUp?m+sdC~k+6Q`?it z`vgX}KYW+wA@4nNdhp~WNPdDyFbm6?+^FfB)3vtJmh+GV#5u% z{2H0jlD%z`oAcwgpYY2#>G&*s>k`B^NgYdMHR3AndC8@kW$nKj zhYZX1m|0fkKv$6BZdD#J)3^^M`cmP_U%c|Nj{(`J;h)fGSbS5%?+Un#8VAqU<|aCR z0e4Qq&uHb>Wc9ohiOUuq`%_x4OXL@v`+h9 zILs0nbcF0p1({if)+R|O?bk-EwEN`Knl;nAe(A&RQ#>nL)8@ZV4hQx#A2m&iiihu_ zq-wc<$EH}@3y?qu$NmZ#KwEv zUvKTV6xIZSaQ2*NO0eRMuhQ*eBv1=aB@vr?vvs`@h5a~L*E`U!w%gR3kX<(C)2 ztqu9^T9^E5j5zfA`6sLl*dF>+WMyCA5v?Nurt{nYbM0`sEv4Ie*2VggNMNU0lMA;hXiswp!zP}$5jF* z4koM1jgcbD_0?A~(KJm?qW&v`_Ej6+q~=M(GI+jW9B;n+fb8*PKE}BtX`-R69pUEY zHo6pNL8jjo)T>K1D!FGw@scrv_9GtcA#pMN?}F1`%GE!q#OqhzDpyIMP`K?NE)&| zOCfjXTv~;^$Z)F-#L4ww^hj}i)u!Y@;h)9CpG|+0FQ8u2p(zn`_K6_xgvMP(5u5$4 zOxt^Z^FZGxmo0%0A-tOMDYdr<#HCQX5}keg)A^9$r@Z3-+#_urcmP$W*QhvGf4IcF zOi;yG{1Ed0>WT8zK1#OOwPqlQy@nx^vaMI+KUnQMb)lgvR4K1Onl%EqV3CuQ)$0GX zPy5XUJ*XW%J?K<_=qBiMMIvw#FK<9{e$u`Vi+4O4*3s}%nV8XcPM!Mv)>%Rh;Jjiw z1yfQ-Ckdnf$ZI^+GY&>P`HByldu{-vQCa+%Af}vH0W7ZnMk<@miMmL?VRb} zVkA^KFbxQun_$G(Qb1NB58k6YY6L)7vsdxP!OQJ#@+_-xt$#>c_N0`vER%%1{%uku z`X4`8B562tm0nf_FKe8_R#DzGJGgyGKszib2fk{jlzYPkw(o3BHo2S{p%FTb-}6b(n#F5?7j~TRCZAD^#~|sL3Ft(b zmmOorGvnYTVTb^hjC+XJtqcn7GMg>HDp-=kG%vmC4mV#hZHYn7D_hepZ9qJ8ylW_E zXDo@TaS+S$!m83@ffb>N#!P zSb>~Zub;OjAedOApBz9uAYo5L4~r#61|sG(={kuJA)rd!^tSZn6(X}N8kntNHH?5T zAYvG=VkPc4@GxACvnQ9*?xM43b90{zF0F)`on$rm@Wq%A7c0Aha`B&%u!C1`9_8g+ z8DpB}B3^?vP$H4F(`QUfH7?P=3YX&KQWKZsTzw*k$9*EZZ>yo9Vx_aTaI$biCdDINm$)y-|xsF_td2{%bi@SzQZ5CFZj_PF+bwebFlz?o6tW6h7+vX!#P!!nU&-k+yENz3TvNam90VA0 zWjvMPqOc;L6>_{J2E>9P=*?cb-%-ZvFpflD#a>Chr{s-@!{?f+4a=}+6`W$O1+Max zAv~FBYeaXgZcU)^=%C;)Um>30H-ge+Vf!#x3#BFXfHscfg54%S4@`FO;cECdxdetv zG@-@icXBRcUBv?U{vH516n-;v6dvddldBZAw3Jnl%UsJNQD4hoy9Km+Q4`aI@Syz; z4;u7N)3hZL^_gs}$0vcZY6kd_K|4>(`vs1)>1J%Kr+09>v>VYs&VXI?)CSyzw@;#QAY!du zWmyHWOxib7=!l;H?)cS*0*^$9nl^!UGy)_J^mZ%Vy#H<;{S2g+aGOy$Newav4L^=g z9{NGMWPFvjF%U!Shr!=KnWLCnXVV+mK#vj5u;E`KA7J7ma5nMIil*+`B$f|j#`d8f zoW}#MawaXA>sC2hmASr-n!qF+qAyPrjAI@V5j9!(P}O_t-c9trG|XcbNQd|`$EOWV zftn2F5lxE`J8AehS9b`^Isb%l&4xl{mPRt~SHCkYOaWjgK<0rAUoSbNE=#1EEPR2tj&(MOL+XUDN%12cR+4<|f zAb|idChQYAosLKNrpE(nGXHWhBs#|{1!r|p6g%$+6LmWzJ%yMk_XTVdH5n74cWS01kpXI56&L5?)Vgm}533`z^)14OotqZPm*g6Wv^N)V*kMTg395rdR%GSB_Pu~T0qkW*O8i{q7SoSCCj2hALE4V3j9 z9M+cD6S_8NrQ|ruW>Ij}yE+(}Ggld>r!9F63aW`A(J zUJ#v>rU^*Af`yd=vq>R2ylkY2vZjFMzO{rB0BV?DdY$!>o6HHT7slt*!Nh1+e$R?5 zJV@*bm6@4Gtb4jH-IG^T0=bVM2HRj#&=VV&aey5iSypEwJxzo7uWRmh1>*%($LIKr zn@DqnR>}auT^_HE{dD$@f37P(dkEn-%xabwKig&!^y#inp5Z-l{`J-H818oK=3SG? zAgbu5Ya7&%PVJPw83;=E~c1TTybn@qsRBh^;lvl>CWSqtUJm=)_Nb27_f8CSo} z?`1K5%D&K!=!vo2DPM6dYDAK1!Qzk0@A&C3Cug$ue78#ZQj%FwrM~g+rCvt5R`2I9 zzer5{hF<|dak(BphMPQS8!F`|ldmR14flH^k92J!nFx4}8v7_5X>gcyaim6XO<@hk ziVoHzI3~d`-s|O37=~WucPn~vg~XJ6(&d zv0chkBp3#BA%e+S;nR?Y@rY$-1s#ZyOkqc-p@&)6_vb3-wEumG>ME*=OwXl}09yo^tL_i|7&cA}HzI{ZxyYZHEhk9>t5a7Qn)dA62+pwi zK?cpu2&{27#m|Lo3+`Hs;85^Jg>edZaQ-DIj%R3TKO4+p#KZ-p2M^{(0B-9$6dgDu zQ9lBYOjL!s7i-Fgra(;w^MJo#0o>fOL?Wn#vQgzQl%a#Uk=%+|opNVN`ru5Ed8GZ{ z72kQuO=k}C5tH-3oe;WN0Ji=lVnkEKh-V#hsQr4Wb_={-Krh5G5#k+90m&7t+mwUc zmBXepz#y$(!cIX1lAtCY$14?!ZElrfo4J$TZ0k8VZ1^E!J=4~)kE4ymk3y6vhbZyf zV3bIiz61uZn0B=Va`QfXsR2kVhr^n5vy-B~jT{TaaJ+V~Zm?bOIi75@NmBCh?BI;D zrk+n2wf3Jr+L0j{B#DvhGuO>mUk)GlX!MV;ESu!A$CivX%-%?>JA)l(o+CCmjxv}V zV3TIM7c)+WtSEsURZNo$%Xt++CFSCuloI1vWrWXAlai=!VZ4U}Q{?-E@^Yii=^xoc z%z6cjWb~13GlR27Fz@7A)8E)N7|wvl5SRPsJON@r;gANJppr(OYl(JlMOL=amQtNu z0&>G|T9#o=mloQ?#f*{{k)%D&$h5w@MADH0476ehMm#dR3Jr0QXEA1>Wp667n-N!& za$cF|&CLg@>_Go!D(zlls{Nb-&3y=HP7VDylw9Tss*a+qv$;k4noQ1Z3)G#EfYAex ztu|w&L{bPz-nAiuj~K2+S*LlsGY~MUVhtQiV8l;hxr>m?C)6c3xh%%zFv03QYJ@(_pc^t3n>1;$R*upe}tUY zL!;DKQ={w{!}KzxkDKA%24~5vB%ay%ykS5TQry${zUj5u^5=| zNO}de`cuEW5adAy-v>k$&?5c^DotCJrAryP1w(9xA=<(Q=&FZa5kv1dBq4*m>6>r= z5rH9Vl$+r3UO}}4$XTxQL#>!+koOf6kLS~WK^CNVUw3bDQ@`BAG^b#@$kK1KY+`aS1n}n#5lIhpsb1?^b>q;(!>!Ahk67$sil-b^p z|KYgDM;D$(B)~KrlKm5(zFj20p!tn>ciRYk$Is(?_EoO1c(7s9TdCc9p1waRJ57?_ zzU#up%QjooZFV8R^c-!QmiM4AB|4T$xz*V0qR1+%Hl_9*JeAl%@${g+5^Wq8VVHU? zf15_Lh11VWz4)wB&ZN`>c2j>tL#f)VrWTGignE#ScnQ@;iFhxXyf&CA8PDpv9=Wjb zj>1$~01jXpL>M~jCeCrUI!Y{a!{!go+2I1jk@|f|zHP3!cnP$fZbH*<7Ze~;aO_pd z39ovr%b~Xo?#uZE#x*uQWU1#Mz%j#8lcFYHv6Gjmmm_{2psKI8h~YAJ%q1b;$$wGT_9&zidbF z;@y5YYNvGW5aN_Od)w~ZW-WY|;9^F}R|3(@*aOTx5a9T}Rzh|YbZ>#V&JujjZ5T#A zV{JbLS~~yACzI?sz#8}JE~(5<+lvG@y0cvMX zw$1h3{ck&phM&=5V2$FqqOM2~0{02bm&p3R87EmO3yqbvV7h4ZJ2YGktdd*BI#JA- zv=|X-H#07sN;aHN$=gFwHmvCx%mB%(QJbe_8TRRb)G%Ex;C8~fN;5)WOtD6W7&K7c z$<*2wp9u`L7512ERlYB?!AmkPqBT-Au-@%R{bT(+6Pq;$K?5uVEi{KIDo^o1;?s{3 zN(l8)LM`Odny#~srWWiuNaEU)1NJ|l8EL&-!dLN2r}7+Lz{;3b;DGD~Q)iNMb9|2Y zK7pwc&;1D?Mo|sF)%_ma|5KWRV`MU5$?ZS^XX^jRN&IjT2R$Tz;vsDvmf;dZ5)%Pb zV&eC56ys>w&1`a)S1>uup9q%K{Y(;N4<}9{7&u%RF5(0P)MV#5erM!yw7=wrd| zy|qO_7X>3>T?9b%7YK^dtK1r{QD?~?Wz={t246N1o7 z=P39enhxFN$u`C4So#1Z1a@+H5qc8#JIg(MG;CrWH)eWEZxZ|RhSk1Li&w1l7o(P! z`JSG|DO>Joo7(29#!lR>sU1bJA;60G8Y)x&=)CxrH$2S}4$of&-#X-Zfw^qCy-xl` z;W4`&Y*{0af0ePWp{AgY;-s&yEdX36Rael-{|~>#|q_dBRFUi;dOg z9-Mu#OIs_6k*s~-bqh1?d zzmtCTbxlDg`yo^pfLGQ~S5Qj*+EZIlGxWsRPNPc2%dinBImk-nOlp@AK2qgeqkYY^ zm1k7{?f#PmpGl38z~sC$9b0~>Zgqje*JByFjsj0&D@7O~Jdt!okLeB(U*8?tD`t?Y zhYbJrau2Q%$=hkybKq0V0D8Ag(r3jF@-giz7^Q(GHU37GRIf^lt>kUD4U?lb|B6+DSKjWML6+e*gr+@KHn+4V^ z$!-BCp$I8##_}OL?GSKnci>(J}hnhk1J0Fc}hC zgPAEdhs1RNxiqO;IGotDX1KH(Y})WMb^2a;m$m+-+p-PSdEG&ay9}C#W+tDF7kVVz zynK53x`*AldyBC32*U!ccux0xZH`2rD*2X-BV6cVOJLBc5^L$>7YzO2SOs!-h&Z_| zaehN+oY6e@V)jQqMRs9yt=1tKiTbgiu+&d}*mYDqse;o2_o@d%SnXKJO@ti@^|Ba< zpXduXwjN%-|0DdT873+eG2CkSZ*x{RXfb2J(|M#$e<@92iANKLc-EuYeg)`=ySQ%L31*tCrn zhA>5Q5iOXC^ zM2JX&%l|6lopZm>{Mbal!4w-yN1a{kqcl|JLlyj>)@PpUOJfYd58?uwzhNe79G601 ziIJs9d2CMqwr4LUP8dI{|JPQL#zKb(axbdZ{B|B9u4bO#PTmekx7|c)j;c%+xi_%O zeNe5lAi4A0ofa@}z;W{#o{AJXseO#_ilyiq>`EavCxmus=Pg=BNc-dI#QJk-|7xgtA?2HyzNj{1|f2o|k&o@$XIPTl8N_|Y)+X%O$?M6s+vsVw2 zPg^mD9LBg*y^R`Uyi^#;xl7c~2Cl@orN)ja=lqINQjh$VVZ08z(C0bfSv<*PZ=By2 z^?V=pEz(au9GYmjM3D>Gq>@fTDq*onCCf>s4B#2cboPQvco^&}b_r5^ZXKIX`CFoi zCCSFjQDtZBm+7#xY*aPv6N=06nIzwI2m$DjX)GC%)tVe~YR@rLh<*Gljf|A$4;IJz z>G-w|ahf#M{S>tX8C6sqJ*QG)RDR7IDXfJ|cI_%jyK%xD&s(EGJV9TvA9Y8EZ~7|Q zn|7TBPvta-uOumMENSRL4I!GnpDxWb@?fXfPyrln5g=I>qdI~RHeD?M@-*bN5)g0W z^c;$&D1e$qivHEl2qXEn!~ZU*p&<6;Uk5Q1#QxEDjB-cz39lCoYGgbFg`bT0`8DrCJmzY63M4JN(u^39!hqkc+`xP3B*}z4 z*h#_wp>k7f3M9=i>=VT!nMRepQq>+q)vl!YfVS#6n;WqggfmGWz8@4w+J0gPQEdgI zI%FW(;Uz^P5dvp{kVu9CS2kZnmdjNZfvOLoA)89iFr4vO9BwgZ*1gKSCH4X``+Bf<-F#Eb_^TgS(-IWO~nRK`+9kJgosj*-6* zbIE2z;+@45<1kXxxb^E7kF6wKGH5e%{`V21tLQp*vQ6Z-Ob%5rIY3{X1ioe-H35== zxJH>wEg~=wI9=-91xC+dJTg^3>RyarIux{0;VUB7e=E~4Y#dP*_!rU%v{$uC(%S-= zcBoW{(8)ElpsesE6KQw5b4Y89FkqV~abluAKVawVk5#)+d5NXmR7U(E#jjs@*;^U4 z1D=~89$;ED0<{A>rQJKo7_;6}V;qm@CiQdR_u_F#C#zaRp2ka8K5mcS*;IQ??o-pFD@kveK8RAEAkNy*l40Pi~z35p< zSQ87mIzXvNto(*(V#|%Zuq?`~Jos`+6IQgO_MC8JkvZgz!>Q+)pFjCk(zs7v4 zq%bO41M^{G;vRixaTtCkg#+#hRIVFrdv}VK!U7?GA(L$|sUiY4#l8b}bX;6M$q-QG z)#hzdcp;!G2%oVP)vPiZ_Sk54EVQwQmn(9fLxw$s%3aB<>?tyAn@Q>jEI?k}ufIG9 zrMJV(za)fgus`Nayn)YPcOYlg4(U~ln3L~1hGI~b71I+sD^r;7Z|D9%QFQV`*6STRxgek>OGw>|S(te2dZ zd`5IAE^rL!b69*|s+tJzlmz(HzOv8HVUW z3g$FQ6%47`xpUwzuBWr*CJ5(Jg3U-|8}S}UWFVNlNeWVCZu?P0$Eks&NM0*H_o$9I z%eTw<9b3~=prMWy(DTPIC00Q(>E)48zff#2^f`uro$qSie^4Wr&blbDcKTnH$whdO z;#f)3ydrcmh$X2FZ`rRw5N?uviwgMpqM}9TOfN7yg=VP!Z3~-p*qKBVre=~ErGy`E zSEEauEgw;FsscuD4H|7!ijd;mjALU`P>?%IrL)||TldZ`-xN#K>%K-pPzudxN&aCU z|BQIye`1Q=#M-$c7EB)Gh7<*OQuf_C={unkRWnlrNLmFl&M(jx8fDiGyYZbPtoqQ_ zY=MdcG!8DwEoS+OP(@~E?^Hq@>i1+Eeoo&Xb*6Px{v~^}u^z-lf8k~j|NOTLY?nLq z^<&oQQLk$LcE#SSmGZ7B*2%QHYNVoQ_OFWn%7{)KTUp^Wx(>}%X?&gVdF9gkj_Gd0 zeffWh&-vT2Y&^b=UFDz0v%bF!QykZ+*j1126U9)Xh!u&wuMWjxqoYlA&(B>3Eombd z3ZbdUj!zYc-TOY(e|tZqBqBFoUeky^un)YY-|$*!PbbX16oJ4<#mmR`6km)62?!NP z@9rPGtvOMAOZK(gTY0<6!)wo-UuvP*<~PIQ-5s4V)1^NdCpFcL%4CaO}#Gt>P^D_A|rm8y+N`k=+(l#?^a0d zE?gy|730;b7P3ZO?M0Qw`6{_1QD!B}uIF`%y}GX>zd-+zU+(#$@EtP~EZ=V)Bk|29 zF;#Nv((=z8WKvI7dnO!wIa|uSXl?Un8RNP-ONL}{om5^uqtqwkCaNjf3JNh18Wjr#C76d@;FR^J|u9M%FS9ga|*kVn|1AxA?L-kk2puxaqRve45eX-4PZ%l75QW zYvd+ibMa|jjp>fqz$59OzU1wg>I^vBZ^n{rqL{Avya?#uZ)xa2CN@Ag!Tgm(bY?HQ zCrH>BE}ZG8!FWGS!7)o{oV@kO$KKGwy)2+`x6NkCaRKRJRmwbvyL^?3{ChufR7P0b zeYM7~J!7oDONsJOxJ5oGFQK_n#4Vf<^(^VyWmpv1b2z>OTwW zwsTPzTTdN5vV3`K8dq_Oof%;zpry-AtG-`0tq@X~y5fMN!Lo<$(s}qIAayAcqcn{Ad!02y@0i6(ci@n&Ru z*uXbsNt5RV``=hxM?ZKoY)Vq8UvyDv#3=!Jxt8C)wZ_^_aAcL-$gpVrdz$EKAJ##H zFOXH%y0^dC7A-poLldeg`XA(0P#-#uqlbgKm=AIsb;16@(sNH@-=4u2%K)za^xw0V zL2~cMKZU0VZGZYF0d(@GUw!r=di(Lu)eCGttD?fIs>QdAlxwQ1BcR^APk_hpnX-V> z6*O|v0hKw5&u77Ot|Yc9n&S zK?*06|>8~_J{6+9|$5zEq zDE{<>P z<$=CQJ>U%7fLioszU78%poz{S|ILc5==>F5yiVh;NR(pCUX6G5aCVl1J5dD1{shan zy4WgNUn@D^VX558lkL+B)klkm5=WZQZPWY`u-;=J*%je3&>t-C;#}@AO!ysG*%*yg9k}soOsl#_Idgd=uOt@P_?m z*o{6yrK9GAgkzp5kNVcFTW1U3>?ZV5rpoL$XDIWOea7-qdHwnBsjKhh7Pa>@#e!d^ zqnl_MN{$?mm@hrc_izOFcb3WsJ}Xz}o`yNH-&!+*B~4G`kD}dqmaoN;5oB#`9pCwc z=DMhz%k@zG2-oMA1!(08@~-COa9gFKaw1Qz*SvrH@MX1d!(XC-2^vGqd3&%fGzIiB z4Rv+D6zAtbi&9XKKsNK!17}?gQonsxRZ-78%})ehWWPL@G>#c~IM6FE3#+rdyZTIy zo^!rz=PtCqaDa}wh?DiO@cl&gSEaDhLm;NSynJ-2e0zbCaer@PWNZoS%=Ht$%(=PR zlh-(g+olgKW*VV9`x4%3NQx!Y=cg{-<#P>8U`Wcax*};bd<)vr7h`GXl-V2mRf%8= z7cK6I!9b6e6Fx8nOR*R<#-y<$(gqS?kW~tChTA=vq*edzOMWS+jT&3GUcr%~K=gSa zAY+T!K6bp~D*s4Nw)f+0!ki^9?LtRUJn&TU2kq))TbqXLI|ex@|CIc(^54RVOGr4B z4cEh_zx3tWe;;?q0mRKB&?(6&Y8=z__Ff{&NR|*`E8T=W{-mEQc|whaS3NH{epU(H z|9bbN=?k8BfOd2Tro0i2$4X945~0kg?4hrOibT%GRi6_$TtEHvr5|IL_>QDKwuGHk zL_F97&Y3gEXCi4u$?Tp=OPk-rA-El8=nhuo0hprssNSe>MWa-;X-mut9(2B&)_IXO z_j>!bji;t8y7Z_6$6p}P{oR!pBE6ZPw<00fdeZhKICoSE&uo&rVH)P78K}Si+gB^!T!7!>Z*&ow>L8#`^u`rPwN7udW-YtAUA zudkH-+KFUfT`Nc03Rp>fOs^KptnH$J!dc?$U^|DoY$v7$4ZRI&KVuVPdrr>K zG?~PwRH8#-Jry-_{ronm^g_tL14da;}c;;aEiuw(&WE z3pQCe1SF!(Pm$e@Q5=F(qqik$=3kJN+WYQ)QJsOmCIrO6i&LF{uXs%OoUf){_1Rfx2KRGtH|mTI)3Grqa~L_t7e5V=a8aI;h7w;BjPeRk#X{oZp9etWZ&p z-Tpv5P$NNuE_etQb(cyZ_QZU>r8c$eEz05)$^PY$`8cl4m&5tB$$5)AxF_>Qx~I2e(hMi^G0%>VoJ`AkL7B8($mpI1oIxiJ`)NpdM5~jTO3#7 z{mZb!IBRC8ytc`g&C#3OudkzXO$$XMXp*K~IAA@77Qu%pFTs>=W6HZQWpUy(s=32l z-&8p>n?(FM=7y6eP1;?ZxRy#Xm;!|qeBY3AnUwj(D|%`<;vaRZzogei&LafldC)UT9D zl#YdBH{mqGZ)zvT`uR63_!~({+A^Q+-^SMre6?U4jV8Eqa5cXO7f=WpbiZl-=W)ED zm-!8X5@2T$=k_T!VfZx5B%zM?pB@gRx(OC6aBlM~GDaiggwx9#BZ^$S1?0D5!`+=K zwdV?vX~e=Q!KdG`ZBQq{KfJc(dnJb#6+$Yj1B6au(w0}aa@~QM&J9|m)KnT8S?hgeBcbbPQ#=8h{!C2+UB%X(Q$EVgok`IN7X=6TU)y}g`i_(!&#x z5v0~5IV~e`OB9T%(b=%DSP1hsp~&GSI<|FSP3(E<#oSXCcac1m$_$UDu`XTiDY&x@ zMj!DNuy+FR2bZ;oT#ncZ;`u7eyDn?v8TBKi zp$UGN`WPqKkAG=M&;9s~_8&w@KfRm9$NlRMLtf~oe@w81e){!q1!rF8uted*EZ$WR z?j>7`UYU0gIqkv?&Mh-4d5YuU?3pu{ai=mrYrpD}J@p7>rNwu4Z($cqP_g1FFK59o z-Z%ki3e8mfx5pN5Z$MyHjAu|KH?ioEaiIQe72?Y7l%wv3>+p6i=Su=K8VMM%yQ2|g zG{FNajk(uybEfwSi2$QAxP7pW1`Q~ev=RN{ghjcH)WsA$9O&EY)Lj!uJXQQlGivAR z|9Pl67Y?k1pZJ0}4sRAMHY-_=rl*(hzTOjAb~;@Pt$1%@&kb5YGQf&8@)b)F{wnB^ zyD6xMTW}4k^O0bvlgh7`DR7j!HzEG4hdBOCtG8&d=8s#l$mr>=%11|+*PPu#;&9@9 z9{Thk5z8@ozCb6OP_jQsdySgf+MDo8mp5EF(~jc9tq$cHPB9<;AL`yb9P74wAH5rt zG-#40Lq$o3(j*yDDK{mv%o-%3lzA@Fq*BHvL}Vs1j}HxsBy$=_CEOC3XZu{9_xpZ- z`#AQ0``CYM$NN0T+Y`6Xa9wMi>pai3E*uMNhnt|TC@{ntU5)v6m}Pq!9f~wR6fY!B z`U(<~p>+`(t~@c|;lp@Pj=Dcee3J(TPoVrG#5ncwEiJJ)q9_M@0BBOZoo3Ox;9H$o zjrqCABvf1TAia?HTlh;xpP5<`W|lrVa7!J@2s^OKq_k$tniunTs4m$! znJyS?v?gedz7FF_J&J|pJDU>dyFZPJq5T0QNFg;Tf<{4io#MHI;ivDVIz*`E#qi!F z+eVZv382PUcwBWTU;0oIn}b#!>8n1n{rvZr5}A|uK3l+G`f+L@r#46I233uxhbocO zqA~J#Kd9;W&1OjFDsy7%v|bsxZL>>h!Sk14x0{rrwH&w-XNhVA zAHKJF&FPQNj(tGN_=Y8@G@9~MC1j-{hsv&&0Hx%x&BylgZ@jbA;KEzMA{;`hQBQRU zFfA%CuG`5tWqu0nQYI?SLj^V)b@g04*07v0oLkS?C^Tdnacr^2@Au9*nj5GZ_!MZ&y-g0%Y0hPa3adB~t9qQsaG;DQ``exJ8 zfSMLTLBS%i+~128IS%;p@&p6~L?*d>zrMk9*8G(-v4q&&LB=Vy5_@1;@D!7IF;;jM zVQ(xOFO1?$dS|z4=h=A}sa*b8!ehL{cm}g*d%Xep!l`G4S_V%G2#xXblCLGAMHYg= zJ{-zYY!k{z7fvuTcvpdrvK)36bpUWPX1 z+iM#$!DbMG34HkQVU8ux=_kB^bnE85IN+wRY7|)0s#J-YET!Wwhl)IY2dLFcZaE{_))*e6TUJTRV5r@+wi7FYu_6HMRRa) z)Zkc1Y~AGIeuU-Y>4QyYlJjd&J8}ergB$RR1e+FHY=@0fo$THp&k+nL@zyVL#nw}i zvYUEPx%&x(SQpPB&d+=F5$ZY9vpS!&^{t{1zjKRQcy`-sg2mj9Q*{m9hg(L#jl_+O zl=5W?V0h_MI~Tgqi;#4*XaVrk#7@RPF$j$ZcYd|?rxT3vEcao6bWHz=F_f-;k+hy& zaxqIl+u6yn5EmXTHrH79LzFT%;=kq)m;0&XYHDifTVL#bjPvBR-iu9gc;@ih#JCLp z32cl;!D4~a!RnF>DS4nlYV7YproPcFwfo&YQ^Rkybmqwnee)vf@N9O=GNOrOoclhk zFnbgps}Zg36o$xzk|+D#)P+NVd(6j<3W@RkgAgik2BRURT4gs;7B}jRToV7}^}g4W zJAe{KkO}!AVZc7B=|;->=1nu-Oa8cNac6+!;KWRG#>$&va(Tk1Z&z9BTmI_#NbT++p<1pjfroT|+Ku9cg#9o8QS zO~A3gN0G2JurzFmZDDNzn<&H-7885_@jPCt7S0^11?Dnp*I)beOnhR7pMV>V?F=+3 zywFm5^W2!YLDsXXG99p3D1_oc*2Zq=$I?rW6g|hId%r4qW^5uJNPB>WN+rtXwaf>* zY@Q+{KLwUo7cUr!xGmlDMk15bI8ZHEr-&XIW9)$39=U~6XyD)N{%iq{R9kwzkp{+d z+m-M*obGF@OwXB|nm$3(G9>H|HJgd3+?R+l@~zHyajsB&jD4gPF4kQNtZ-sye~v6@&5b%VIC7s-W`P_ ztWC@+tV^RV-Ki^_{J?n9wA!VO1QZ+}1fgKshhw;@j{o+9K2`Pt?Ob_CO=AcXwFHkz|PxaBIi zLpiEo>EkJ*LpWez8B_z{+p=P#sdzs8bqDk@JiuoQq}uqu+EyQaAYppLzRAsjvN^Tl zQ2|D?@%!ui+G%vcwMFY}F__r`rHeMMo-T|EE+`J40R75VdkL_~KPDdaKNXq8fXmeW z>E;~YbBbi9JF<;<=@&1zp}K5z{4t;W0VCC66Oo9DI}%4jY~Jtk!^2o_`upgWc`G6y zb#_q62{hOTCXeOKZS(=dW42+azs$tYk5A^&Ar`KWfFkn27_CGBOxgueHE7}W7R&y* z7zOMJy*$VB`Lb72Q=O6vCvpG;mX<%-rGd#Ysf6%Qa~)o?K_$U@6h{UqfT2zSA+{Ph zMd^t2-^(;(e?E_tCOr%l$>z&?pzg|s^bj~@4p&cO2tcn;p!4%7;wdCl*J=Rag0 zrO_o4QX|G62ygqJ7uU;?DWmbNLk3v|QGvds75)dlJ6@eCCGb2b*^U6FQHU#X zljK5D*&w`e<3^hXV!EkX^rKbq`utIqApp#&=!e2U*mO$bdu-G;dyLANYfHe8GZ+>K zkn8(9KZB-7uL2NTIM^JJf6cq$H7z3 z7@a-<(bEsDGOmwWPhEKE6Ta>xhy?s9Q>s|he%NAH-D7@0LyJ5Buu`q!QF^GR;{-!Bui`v32rP@4X~uNMFNasN+J-hcm+!TA56 zQ2pKUxF&4R8yY7;tC-%^ncj zB04P@#+U?q# z2sD*rx_}2MoWvfX3fd$mXY4jQP>XW~?8-zlPvb()^Q{MvoGN%vD&uTa^~p)Z0&N^4 z0m$12Qwt5(plpKrd!5YFz)dYH@!rJp`0L6CWC<#sCWCm`psK+D$@PV|fC zkM)$H%FR!;pQn8YPmQERawUND(Q(R!bYubj*zf1dP5wlUK?T>vKiRW5c@qxYavVV* zQZ=S5N+|LWDN*fPpzl(}(WX$?Lomh=I?<8F(%8UKhOlU~%tdQ=+M!#Ndn z)cH6|r_fJ;2m#P_T0cwrpyr;zWAckcZFbT0@=U3}FPCUE1=Fua?(2KK>fC8aZrTfH zlI+}%U;XNWC8V25I?ofyQV;+Cy6Xh#A0PxM>(@bPSu*UpKN&fi4rH){lFtITt3#X) zsw|m&!8h@ko)50M4&q`W9bo`!nW&ekZG&?hM*5RI61g^MpR^VGl7iK14*s=;=m^Lw^i-~gH)etT1Gy>aikT!0@rLK_hC~* zl&!$^R$s2pWo*X0ddBq+kL71j)`MQ3fMQvcNl8roVY3<-;KQuR%+r-?s~W5|@PN0U z{=iQ%WvtGhD=jKjUjT;uB7qFH|5_)n2{mgI!8g+GrqC0dU7!{{P%w};U=5Sz?%4rW z5xegfiO{Dnp4aX9V22ecmhI3L%0C)pZ5X@1xM1HRY;M*cY*h%g&uJ_NZ73?&A-qR& z8@q1#vu~f^*}T68=xY2T>6}|qgfC8;K?mS4^@Co%@ZGh0ysui2NmN1qhkcx{?BBgf zKt+B86OgFBC9WKj*xsqay$Kc{@@P=WuBEEFu<6ot#v$PLJrO@!TFgPTK!0#8#&R+I zHA0)5`Fyel0K6~?M57#t1_;>%4Y0qV zi7Iu(ecG&8EA6z+d0l>o1Kr2q)ZdjTcX9e?bONiqK{I9`#dgvT|?SZ=-x42OPc)p`}51I%qM45vH ziY*Q^G%e`{em?`6gCtqvB0-emGmR*4^+Dv*aW2ep5K7HC;I(MkUqS>)rOL`SX6P~APudc` zXA^O<;(c!jvB#MjB&QHJ0susZ^xN1r`@H@I);V+(B1e|m;sPMs@oagpm z?R4nH(no+^*@V!T{(2NosZ|QBZaUbeSnOpB$UgL)hE6Sk!uYcJ+}*l&C`J63it;2( zFvnS4+1)=@f|$?Y$c+EpS_<9k32pBFd~(2bg0j_`IOJb7nJY#`9Ly*?qWXt-0g+ss z6#alBd9H)sRt|Z|E9dU&{kah+9_w5UNi)t(UUA{f|FGS*Aip)U-0`eNF#YT)nJYkx zV%pkuqG{lG(j4t->TU`rB8*BPU;`?iaCoim{nocM7>J5UfNmm*b*?VpQLpGk(0fo< zm?Ag&dn?&)NYW!S4&0?ISX=TT3P%8HMBR=OZfILeeSv=I^h_MjF@upUQIz5T*ZMT(kuFRDON6!uwA z$CLU7{M0M97f*Z#lltL9_&$;|=!MI}J(wPn-I*0EZ=?$zMs5n(vJwTKKZOCn` z9MC?qXoBOogNllZb@kz$T8`koe?^_`h23d3@x%gbxZb(sNww{2lyQWa%u6LiiVbW3 z@Yf7vfmmZq-8sbi3qG6g5tc*jXG&YAk@Aq}ylPBADqVm@;|fQ~h1V<0KwSf%%SeJ` z75?G@*-?JsOybzVE>Dg(O|lh^wTRDP$|Ev+A|OPcN<%gTSHtNU5fNbm3Kf?F;4F{F zq>w#Qe= z{QzAKa6irTS31?7X6rUCmpG~DJzCrE!q0@O@B?O!fLbec}&f($Jp;M44m zH80$2_5n|lEG+VPTddU5iM;v*-6gBxc{44z0st784dm0wy_B0|`U!3&dW$ z&TeP-xT4cyE8b;vS4`s2NOru1u#w(#cuDuW zqOl1xN=vd@g7izr+^X<$9s`fylOeu&qDGRm8}**cJqzhrX?*+xOidh3TfqjNrs(MM z1WS?YY^HT{am|mjjd{DW+9G^}#|$g+6ly%WP@fbbGHi04-#%feqX*&&dQH>BK&JEf z3IG~`2CQ(6$$@#J0m!_wkeTQycmn(cW{*cG_^3}7xZnlFyMCc)6>1agAtvQ9G4>d? zc+=3};4W^Q-aV$ow=-`7(Bp`VxY)S#j-|3@BI85Y5SzCC@KY9x?HLLKqL8S#3!!Nd zuuIg0o8idcP5y~u-+@_FWJu?&%BcXMdoQPgo>k5mfY*EhwvgQp3lKUqTgHJpcojLI z%4jy{cubBqnTDQe{q-xT{hnTi<%&F%5L|zBMs|(k$*5NxdC~f`x%W@^@K0^~R$-2yQPWJCroGP+()GSjgeifXcCd632S;U~l;(iaS*b(QuNBsj-pvRGsk=ldvJs<22t-?;HFiobXkAhGFfWX|rciNc;df z@*50A3wE4qT#b@#*AVKrCFCx!{mY*NFvZP5 z()G(Jc>I_@3#El8sPBk(PC(K6DZ6xQAO;1h8NAxp7H1&=aGqDzkX?F z3#o^6&m2d&rrl!28Zp!V>fef8ETpxXh=3xnk6!#AFQoQ0n1vm7ONKtP=_u~={^*r5 zY2ZP#Zq#~DLoq0^P<99mvlq~S=yJG8z3+oPwBX`2$9Pk9WHT6T#V>W5|@!%!Z z2C27~{@lCQ(Gu?|sb>d`XJmD_03deAZ2ep;H;KA?Hd03s5c#SE!`vP0o}xZ8qPPu1 zoqpZt7@TaCo4h|VF$|VDU_>_+A%vFXyaiAeG}W{wM)Pg6gWqq_&Rn`^ro1(r*KAcm zuQG728~XEekqy5At|$hcr<-S)9Txgp*rBu=jt2xVR0>Bs&!c92Y%vL#i)0AuzRZAE ztW0}gto7Qu+cAHej*;D-es> zY74AUF0=SxVpup+UR7OvaUQm*{NSIjWsmNOi9a-aiNVKE!r`L~8*n2GTr;P-%7<4D zl$1sGNf-_S8yyfZV_T1*LWXG({?o-U#qiS^kei+R+Dwot<(W(-eKfg(ixAw^H(V<# z>=tyLcn}=Sz0LCD=H;@kjus70xW6JOY={I+HbDrVGl3=9ld`_wc>L_y=6*Zx4LO2r zV7&Ht{|J+DvA2-_4!ivvidi+iUoe|_X5o0}IgPUEQ7(n5Zc-27)mmR`7vLVc6Jh_` zy%V?Rtb_zTA{*biw-^Q+b>l%8Ts@&I*Rm$=>nXAaH;d=AMQ8>;w{5Sg>CGh}J@d;2 zWy;ClzG0e+NK-bflaBCas2|ynz3~n~2o;0o@JAF)2eErl!;~5rAuL{6n(C-OQqy^KMaD{u#`6gkMrr^n|3X9Tti)n@+&0g=c#j7@mGmGP zKUFCXa!+?Or+~d%UP#(F@einpFk8(DseARGCPmpZYLY6`gGZ1 zbOYR#qB1SM^fLy1O55dv+)GY>pf=L#WawU-v7f^;)ng)%>-58&=T!2&6o(!3bW@Hi zu()=RV>imT5cgq%)(s8;Hb~@V_d6uA@1VOa?=ivv0oiy16PW%KSb?G~KJ~VfNt;An z73z07L+YEw(|s-7-8Gl>5C@xfj`Ggy*r9cZ=g)k&f%vf zz+fkxi#@3g3Rv8Ncb2ZkS)}PX#zTLOqc+}TGkokS&oV{*#`NbBzS2F4aT}(txCa95 zY!Cs4{2M9J#65CBbuin{6RbtnyOt>95O=Vp+D?;IAtqWC2g&v+45-f588?!}&#vCp zqur2uRy#W9{V#;r_yMOzfEZ77uG9`^pG55wH+Oz+7Hy?01RO8uk2$3dR_vx0Yvk3f zkuG&c`n|<3-d*|yiJj|gFD7qBJNwZ<*5dwVx81`v`S@n zXPU5$kw44*K;-Cn?#Z`)HM^xW2Ohbhc#wB@cTWtRJ>#PCN_NI#li{A0DVhdcVYIAI z2TDzC@IPrN6=i^Y!GVe8CX|-2%W?lj?yUX(%4bu4d6)a(ZnRECye~mB!4bj<)T9gu zVAO7vl*1B+W6c7=*cmn);rzo$=yoXEYbBBP+*$R>#@=PK!45-aV^#s^Bsb=}=wy}S z!LK(jGy5`*(mL_HwmGpJRE~lIjg~D-k_a}qz>Eqrc7RPjhKRllSCtdy&8^xvFypQi zuC!Vz^pHDe%}*GZH4x8h!OfHUfUs1Hg%yd&W8gQ%MS)F9cmIL}PrZ5?3uivS&I!4l zDAjD%PX~>E!hYRk(3EPbe2Z5`&u^O@OOt3$b2QH@dYG(ne6~rWw5I`>Nta7LZ=~#10aYLg^qN~D2k2ZvzF|F6?*S)J{ zPV5G*4`%RjOJyCzif!nw;RxOdK7nd}fp#oVq2AuVZ#RV1;8N3ZX_4;k!SyiU&|G@I zxQWKH%f!#4F-HoG59z=oTFwv2l<&Sw+fD@K#s(&66hDh#9TXa46{LOw0T6=`9b&I{ zzXPJ~Ok&A!NP?Yr>k|MFHS9iGJ?OT{F8)EC1tTEp0)vC2QMMPnvuo~+CjT>HMIdiI zcnsp%xVX65AtlB2?ZBr#fUxvnpkA+0?I4dJ(;0LOao9C2pek`60RqIcp()SMd-M+q zBdO-gON8CNSUN0H4i*a(0J|vLIjT50nrY)WsXEj;0oR?*$XGjaw_jRIMI@@t*s>0cr$0j{@LMRZ_8kbH+l$)B+9jax7&m|q}Up*la2Wulh`T_jUA}t&hU=$Cf%Z_9YtzQW zb`GiOEc=YCrM}^J{E|>b5xRd^&vc++;rqjs&S{Kv7^61w?$~Mh@iD17vC*Mbs4@aAxbUjH*A}QBkJ? zgTQxV|F;9i#P$>r$&44!kA4`U^58m8=u3$kl}Yp=t6k_FI%-<0QOxy%KDs5Qs&MuE z^=PfAM+g>(t(tx?ZZyowj$}nPdH&d6$sGtt^+56ISf84U2FLCGqNtq_0-6N2c8+u+eu^u$= zotz|WU@5piw!4ox%HX|AbtF75{jyW~x(xa)h$6Iel9!ifHC-a?9W}QqylV1nLc&4C z*5PVI|9<#M4kzdJ+(2)=J_SZD6%o<_;HFd#c{T|InC3csff+Zjs~W7By0MwLn*E~t z*6+5I#9;4%(u<-W6XMrSR<>c7!Xtrr*g|@;M%ik}Z zEJil{DKoxXe_*KFfNnm@qY)&nGa-H>;NPd!|0rs!hDL>M?T`8vv;F4aB}2h|(E%c4C5G6X_3k`D7rzXV zi+^30UGydBs@?99hKwi-8JN0s+a&c;A<>MDp7Mjk)H?qrIyRXZfEndIiXcVGQ*-A`7Yk3x2FgCa+vTUj=6K;+m65$ z*LMslRT$;inzIUX9lDHK+{(&+4(>Tji`grMl`1 zC(>amx>aL}%%w&sd&3(aA0OHZU>-Xnva%%m1>$qM%BzwkV_7cGC8EL}>tsZU)s9zW zNJaT>2}wSKJqtBaW!!SbZuBzYG=gDxN^7s&oxKwBZ|?B(L!IcB7fHV^<$ zwI5qfb(kHNDsBoJ8ygdzG>ClQ!TTz_ui`co{`&UsWdK0=W2GpIe>uzvx-xuh$3SCdNZM#UhZ?g@uG-x>aUxpHo=Zlf3O*UBZ0| z(ex%bW%^X_dtb)7Ge_oyu3*D@#Sz}!0hj6tJ!7*h0YjtFfzH~;^5y4-9;pOtPP}w? zyopd}H}X8@h?&0%_t6n2BR3olLBrCS=kS<6Itl$W7`^ zRp!Pf`KMc-*k0AGwi4DKiRq}gRt0j32PzY6u^Xr%;sNa4EFQt)KTP#cU5nWnNzLn4 zG42u2t4T&`K#Y~Gl7nfPBl(k9emT;#9-F-Ck7PIkwWOZs+!_~p<+hr59AZCW@c`%y zGwOP!-b{GhV&(k1-#QO*^dE}(W4I;G|83W$PjAZ0YpJ`BdZvfEo2dg2t=vOOVXl)f zQB=V-7>-Ci{T~-z>7Vs~ORueM3pqYN%s9x@~B{^rQL5mul$;-9? z^)W0Uij?F~R5+uEvgeY0%|B)|3iCo}-JtH#kw9*T*M_8KYra**S4*dov5w)T)AB?4 z4#2KY`}3({PT(s;U=FHFfwv*CswDRC0;R&~aT(`y=fX2(3nSbGmeyBf!VI`FgvSU+ z30gU2dSapFy&IoBZ~5atUvoFHPp1xO6~hV2RM;lq3pBhP*oDTW#}V!0y3f5NC#DC? ze5Be+^;dd8(S!R&<^GZu_5R7TU((F|Rb+=x(g_T-iHyMF0NWnMq=PiXp9lokK&Mrs zaiQl5yNl;iGkY3UD&W;i%geP8ozoHcqGg>U@&J{ZH;u~yxu1a5pbhKECdNNIVJ}k5t2)OWQl1&3a&X9Z@^s6k7OK|)C?7O(Orb!n}B-(fh(qh*bg z;wof(G}ufl>wB7oq1f`yp0{~nmC&)$fjWq z)fY&2Z2Wv@tJ=_xaOktzf#=SwEReAThe^{+YFK{>iof#Vt{>LXI=An^Z3url!)Aj} zYN8sCON?y|m>A3-q*X^I=>B)M1!h(GhK?c#`5tRWhc2TGW2)cJ+`z`*Um{_ro|Z@a z5E=Eq3uG-d)ERv*?{-v`sY?qmBiN_sMSkRqqe{}&5 zS+hDKe8iOUrSfp|Aj_VVx*CmA`xKOK6+aT`wTDCo6Ei+3QJ?t$P?=IwXnx<@KZt*5 zR@Z?}F3^r;k^Hx*hq}B(Grz1gFWdbFC0bfBGNjm?Ne39&YLP>=n`N5|C%HxS0$9%t zmhk?7V7ndMf?iYM)S67pX?243aw>{X@D4Ld?o?D?$&M?-NM+1GacxJqzEYhn&kK)l zjfnHDWp}xI==9f|bPDBD#TqAEj5e@Xr@*iDpp=&}gvs@c>p1SyCgeitM!!h?v)Kyd z4Hy{E%Jd|6dqvSp*B2nj_B?V%0Z!~#ripjSE$!Kf@jkXi2d%< z7#&#Z*9Ursg&x8~9kNNToCe@S$h<-LtGRasLn%aKdzv(uu@!;wN<#}~M#*(0?Lgn5 z08^hMrqt+s0fbaCO!?VjFwE22I}On{cPCQwlGgp4u?B5&=uxY-05h}E`ApP?M7i-k z4H={vzX0dGLRQ1FNq`Qeaw3;4tZc@iGN~P~>tkcNI(zen0pE9OMJN zrP+U9ui7M<2f5;v=3mv3!4kHqlWtHbQ&IsF;ecUZ@V+{8m606NQNjn?>Zfxpx(&%A zdB-onPdzot`27(D8Nc3(+Vh4;(qOBUG);xcZEY}^>zoqzXF#U0zT z4zT3K;lIPkQiWiu-}%EkStLHWd(;+9%l+;m3`U%Kn14hRMlPKR3|-m@k3DM#a)?sM z((-}7{0-D_N1seh*A%#s`Gh?>(F6Pm_B%l>*Wk-AD#`W-^;5Tu`i2xxrLDq8zK6oP zA6p_knKA{+H9r1!X*xtpkw>{R7?0|^*)5&khT2X!rhx`f`p z*-3psqFonPjDTzCL#7aXvh{_pl_R(|4a3ivzoL3<8VAF*7@f(d%a$af#YGRy#wQe6 zOpIa6)Td6Iy=ax9SC)S0;JK>sENAU zQAyM|`ALLtyF~!7W54Fz0gzQ47qn~WW%w|9?8zo*HDm=vICv6WmB6cp*`BprQ8>bvrbq_FgQ zLquTqH_y2>&&`g|qB*$w{ADy;J{YkXrzX*l$F7>=M}goHtCzHNU!TO!9BsdTF7}T{ zMlJ0R+V?c^iRQ?F$lA)(5A7~QUoR1&Q>G6TvhVbNjN^5F!Yi}It6{;8y0KTu8|yYY zqYhNQ(XW|pP}wnIz?jf4K$XyPtxDmyz zxk1as4$u-KGC?C*qABbA6SM#~Yc9iSsTY-uWiM8=8nMMT+Bli={gnWs zwrFy9#3d_=UNk_9_6Wgo;ex3tRdzLM)VWWdEn>3LWd|f_AQQKN^`$@H!fJNUBejKQ z*z{u!E2?|1&LD5g`OVZ=MTI38c9+=I!C2XV)ilE)iHciCZr^T~UtmLNdR3fsMz=lM zzm5l<+x014N1V>q^UDaxM#OZ!uRvkU98?nJt=e3NbK`OYe|AgXh_jk8B$NB_Yrx^I znTqU}#U3v;4_{}vXxgF=`+sWfzdn6r?K1XnJ~6KR3YLm@*H=Xyy%Qn7d7C_w|FX*7 zIn%2A6MM%OZ9SE`F_-yfy*0EC?sxZX-b+8OU z7HJif<_PH>ZLV0)#;xG+`{O0FEIr2GR^@p5M0nF#%^y=u=VHmNH#Arqcg^#qCz^Cob^ZeBi5Jp}pk8$RVA07J^jw~HSRK9>+7?L2Xr zSw*Mt!#U7n;u}>!1M8FeGk>7xYUZbP@ z+wb27N#lUT(?H<+fIRsIyOR8Y16R_LzU{mObxBFX;t&Q*)fK?Ii}tLo7sr~05qP=V z$9CatH0OSnU4?sY%mR+W1M~P~SK~t1?o7(`V$BH;Y>$WlQR@XJc3*P6^4tDLNtdo! zNZ-BICMy9m@uG`ILxVGih8$DCu&TRw?_J~UG|3pCELW%dmY z1|L_A)r`o^%}tZW08(||jySFO$eaZW793Yqy|aAvY7tbI%s7p@6O*7#eZWMj^Bg>Q zu(G`Tu>B*zSCifo>uwCU<|ij7v+o&0uvSjc2|bYu&T-L2{Ir2(7@T2=G1~D_182{k zt=huiuZz_~OS_C@|CTeq!I=0zLoyP#H)B(N3y6;X4_8d}D z@=H(um3<~LRK`W=^LaZvE-x>ySKq#woSzsQPIU%bvxH6l4$Ilm@y$RlHq&B%fB!PB zea-d1Qzvc+yUtT`mWB)NrL5b?oA=|M^=lasnVFf6=o!86>MSQ)i3j%1aWK14ZasZ{ z0W#Vs(a&CYa|L0Z`XjpX^!{__&Q%r_$=8)&nVWXbo8|QpQjK|jbab@s`=zoKSm2vv zJeG2Jw{>-i*XBC+$7xnr?(hL12>ty9?8D;x{CtHd^^HC)2lk$W51kS z;qj*3*RNd@R<5kLR#%|IMMbSB#OYts)`eN*7SY%H)flU?oIq=+1Pd30~G zH65GlG2718b}2@`p3n-z?hklXM@PzWp<83qSv!0ZAtp7ay=Lsmo!h+;m2#vdL0iL*o?`u~OcLo;Ldy z&LkN$o#l}8{pRv5#orAs*x98^?&RX|mV_3vp-fg*cJsq> zYdgDFm{)QhgAXsv_dPooul9FSAs=S09zZA5w~h{<-@h%fnx3HCC$~l9vGTd`3tIQ= z*~37e!3@R_ggOw-^BKc7U-)F4m%@BC9pI6TVQp*s604aW`!VdEsDa&BD+zk(9F(S> zKi`^UTzDIT&bo~o-?Bb}s=p1IdjG+LPoR?^(0pjOje*$;?9tV%tgH)PQ>Oh48)REI zd+EHvqmO!cKUa}C=$enu3$VVIQAq8Ju9lBd562I~P%#VsU(+pJu4vpSz)X}wk0ckt zBh+{Zq=m`~w**ZcZx9i5#c0A3u?oQ4%5Z(bA9BkXwC32ak*zsm-sBgSee zTg426zUNAnEfM`zQK~}EF0?3_FK>fA-u#{`zei;3<;yb}!(C0t$(J)T9lSG`+Yc7C zvH$t=#}Vd>1dCo&1I(td8!Df_VZ8!TD3_t3M5H%&Ke}!)A8#5Mlm(;!PM$#;u7EDY`T)S~&ueJ5F1jFVAHhG&0f=}7a;p;4B^(_c`9($ND@84ac2m?=`6T63`AYM? z?CD(N<*z?~UNxJO_a&e&e8(q*mU6^s3iow>=7vc08Z%nU@~L;J7&eS8Sjc_98yB6Uo~O~ zYa_2)pBgNP%V3lH(9xe|*RhORawy8IJ9t~fe*b5bbMt|OU!ou>d&Ih;xbT(w`a6Pa z*RG9Q%PnTG2eot5O#iuRY9>gzKtEmIDLXk64|max+uCrq3FwPSU3E15dTR%=Py;&?v)X2n@VRkCIM@oo#$C#` za>CA>VTpr%Zm8cW&Yv`eSge#{7c8Rd3Ag=p1`-}fA2zeb2l_J3@o^M&Ei=?f(ZQ?*%L#4pE*e4 z{#(>5eIhcxDX2N$$!k?EacfwDz9O(5_ypWWsCu`{({h4 zmTSey$(dk~T@~SqeF2rsyL@!-{_US>Vc>d!38sS=N_CLbyah)i2Li;ly?gfxM-?JN z5pu2S!8{1nXcZpDaJ9F}z0LA^(berDPZv<5?G}t^0NBz?mwDcB6N??dgWZ$C&CP9) zWewgqIQS}3nsNsOes8pG=$zz^U3rYk#zk7I5Syr~fw1X|)!wjs_1d^1R7aN~3bG*` zUnwjs%zv?F?b=i&1+g32OT}+gc_kpSrM6&Y1?CuJO`uI-GXepUihQE>3FO?=96;%h zS?AZ~<>xDZnZPa=DMiI|%4%r8)-TBJ_hejlkVzE5puISppZ(g+n+d8-SiP^{RiPU^ zy=~>t@NhV$jG^UmUu>tX6c60lK7cVrFJ8=e{~lxZv$mkk{2B^9O`)f8JP=X1VX*KXWMu8v>SBK)}-2FAWVV2%R8wcs_pocyXp1khG7$EXELE z*k+S|7}8*LcXZCJ%*W_B5%UG3rL@BHKqmkusW*TZq!RZBR271&?T z5;R5?V~)?smN|3gC@L!c1J)|#Oubmchu&J46L&>V=mH0SO?e`A9EeVsf897F&!8pm_@{88v=%R=E2(82Nn4d zJg2_mW(i+VON&s-6&gXIr}@eL+HX^giA&AR9_(roPehB{3eY>p%$&-y*v!PZORmm$&*9f8G5QbAvUWN^tX6LHL z_H?~EeB?+8>?L;|JkZ;i(zpqW!At1I=9RZ3Jtm}~7q9P}QJL=-6trI7{=x-6(cQau zmjzWz)5kj@w zu=w@@@KhGKxk!V2O10bV@F$=SUW7)LEIGj2CxM<20@%>^ehoH*Z$KJ%>JHzRU@Isn zsOo`s{mimK!dy38^`D}mX>bsUz?UODx=Csh{)@s}NFK@-nJ8k(7+&Swc=P5w@)oD& zzP|pO+JvcD;QZh33O7uEv0E-M`*F^nQ$>IYMToTh_`&{z>{pV z&+A7`Zg3%T-Mee_Gtu`q23m#DbM>dFYs%9spNR1A@YCoN`O}!c$#5y|**ILa zdt!ZRAy(+R9Xm2U+M&v9$yt>Hh;tO23t|I10%2O(w)SB|6AX{IhspB^32i)2PCZ`% zX~!+)vygPuM-=TRUuYauPzZ<6irS<*vSo?@k>#35#o#qi#H3H|HEh~^B`EMAjit#ax_ z#U4>Q@NX18DKY(PCe&mheGp`N6uNaZD=Xe#Z0S2`J2}O8{%5IeFZPMp9|c zqQCxtJ%65N1m?@&o?s3i_{q|s88c>>mQ9;BEw#n-*jGs6cOE@5IG$Ve3G^F9%~;I> zc&BKt_O)BL0tMUt6D~7FbN6XJK~s}_hJo$Mpop;H8+FOo0I{ejUDkqGhCCR*@o?%* zdOC`4m=?}4mz{kN3Z`wr9RImBt*i?z90mpk)~It`uI~NTH3P3(Grz6h`O$6%6+L^D zXyCvOyBXx#&qp|^x?pX+8Q&ghsj%v%KVk-hqw0iRwkS39Nc*yFO;a zo!s|+gP$nST8gJ<(kI;j@A zyGcq){$si02U1N{+I&xbepGhCfpH%lfu7F~_n4YDl$6YdWBgU!_J`$Jj(;PzeZ8-j zc*VlE2ma#N=H7gs#11*X%?&FjrSsmmyLy47AqJXN1c0 zdE6183xn(nYh4YJ@e#X?_lr(Syse9hiefeiY6)}U z#jHzIW-{;kEn+-I)2OwzHT~qV9S`w8a8vXI**#5a-9O}m=b0_JckgzX@wQ@)tHGm? zbsG)kpN9MT`i86vf8g0VFK~9ns#LCu74(lVXXwt8@eeM%uLyKxZN6*k)+IzrUVyi5;i^?WsBIez8vzJjV?vLq z+mTS|6#yjuK@Odtyg|6UWf6mdOdT9Z?$9AjqB&FTzEI2i%^O96opCvA!R{D-cZm!d z!cFK)dKExg@ky#BzCgOrJ0*e;kY(Gxm_ckg;y~uoCNM){ zhXYelFy4gY-2!#$I44;R?vHEK#1<`^;PAwX0eq~4qLN@-$csYwx#9sK zUzDJDczcx&pzU%R2AGN_LW)~T7E$wEQc@2pN=DhEE&mHq@U0QTGy|fc2ikyXnS%I% zWl2r=GZ9vMCO-TH(xre^E-Sd`jX29|B=$6Yq+jEIW zQ;L77DXv|&ZqbSrx~KAiFHBJN?dZ=3k@k?~;_SQ@vT%(Q7`cqtBXPSVr||WEAB9#9 zUh|%XGuxB972e_0fo3o=K|#0xU*w1B3_x5IhXRQj@wLkd9gRWHE6t!e5$%&m3^MOQ z<%&XlQQEsnQ9(N~vSRp?m!Ew;b8OpQ~iiVgj!J^A)2BC{m1?WZF;T z8gWbT2dA3~^3%qUijUT|{!c`dh)|eo3zp#T-@mkPHP4j>Z0zpu4-A6&_;Po* z^~;wp84MyPP;ku$@JfxGEiNwZb9W(GoruC=P*jH|ux{J7rD%XD!AiV)_wFUIx66Q6 zHZA37tXJNzspcw3_4ogLMW~0!dGMyBq#$~}LZxuB1N>=}9sYy%AlRrQB1fO?$>|OY z3c3RRuGDW4NQ6tEI?6uH@w^D->M?pK)ElrJ{(_SH=k&Jej4xlmrnrsT>#lu)?7kV^ z8uMMk!bJYj`0O=)RVse>(MFe)o6+p42EVXAQB91OkI zHnHJa?QpP+U+U=h@85r~X13$GOk;pNwZDDYf+KXRCa?6lH`>x>^iLcoq(J1nHCwl8 z2wyuAb-ZAwZb@V7O@4OO_}Un)_+_hkc%Xe~6!^*tIZFS#Jl^mMkGKUC-kVya zYUzH+#F3E#_YSRhxq6%{O5MC`Va%13!CmWati|$7GRUTiJm^6HkpT3ix#&OWH^M@2 z`*yDN{&Ro`5&J)!-ezBTQGcaZ5rcss`piZ^Q{&_4qb)0MDXOR(Kw$zI@(QYw+?W4@ zUbG zDW~u+_tvTZAGZTi%&z_9!{-w)fBW-^|ff=`}{n+qrM6xi_n7${#NVN861)!sefE z{olNOTLg*nIwJgWklYIu{)V0l`|m~;?p}t#7PoA_+U56_Lj6a?SB7)nGOhdJy?X81 z=@+G+^PV$0dOe%@`NrV`LcbqOEcmtF>cuKqW@8V9*IHDfK*RPU1flJK=)ZjHsd+$T>}F|o%G|<(=DZlU;$ok{; z)X9!+S?y(u7q9J1-Wz`L>ebm~fT6Qi5CB9o!aPS5CZ|uIF6{Q-8yEH@Q9>>KG50OU zAH}6r9c{hh3uCKmqv{^6S~>03H&yc#tef%^i_7mhpPxVQblgfKSH|4qN2kx3?(?T# z$~-Kl1(W4EEJ0;0nCXE~%O?ULFJ2+mcf-+DRtg4Z5dc2P%3u+|!w~(6thfkCmpUr! zqEWk}oH0RZ#XfiL3m~Frj(_P1%-Epv=ou>_ODSrvipm0rSGR#L%jz)4bk^lsnMtL=Z0d?g3NS`?lrkG~z*3U&028b}-v~~b&BF1#@`}=DIzEu8q;q5;u zN{dM?{etkQ()Esar+_t%2@OwIvkA@#8@Y(wX{1m){RrzevFS8El3 zc3|MqmdLahO6TR}btBBbDFp#E+rhy>#3*k$p@zltDV8HK>kC+n)qqy28LDi6WFz>FRh)AQW0PbD0_9;?cSTQgZ)9=No!5GfbpvwOq~7_s=-ry~eyA9n-AhS(nt+=DQ| zK4(r5>f-r8c=4GK>nU-g1IZIKIM|0R;8hURVpY)b_6&lC0R+O8-{k}ZB5NGy)t_w__}itCr3u(K34|1F#plnT(LO!K3R^-k zEifwG4W0yl0F`T#iU37|a))mR2R*r_s1dHiQ%XZmuj=W!1NmS@K-$PtULVJ^ZTr~_ zi6bSZ1g@FJuD}Fv0%h9Xui)o_8wur&e)&Qhqo!{^_UE23C4{bAxwudbhBMhcUe4V* zVp#q>=d#gdlf#x#SKLD<*9FRYexd-+LLswJPL2y~#@3<{JSs)tst83!_5h02@9DAc zKYr8zA}E3z5#IOpd*(w=fyT=jM!1O>W-p1D6jK8+kjK|RFkTGuE8ai!yZ0n*9#?t^g9i_iD@8UuaFkY($zy2*3>zFXtN)83K_* z0PvFOlN&%_Hgy92!RUzJK;P zmCU|)`hWhtjmY9C6ae8D5=etXAqzGq-(5lp`~pfKCT{Ln^g#$asQcpTjcY}2IfMY@ zGU2sBR<@#5^2^O0aSA-*dsP$-AZX%rXi7YTeHkPvw~^bbhHItPplNVzmrsp!MFB%* zmbF+)0qDGi9UWmW+AqRuwq`TM@+gS5b>2O)_SC6U$R44NlBZTYw^aeEN#KhR0r%sK z0C*iw8b-wDkoL$^D_5^tkoC%yE5+kj)l~R+m*$Y@b+fS_+N8yo5IsRr(>ZvX^Xu2! z3Z$?&Wv7)cG7CT7_xg`Ef_Zx)Vz7uQd5skO3uyA zWp3F7STK^Xwt!?bw6s+A>uGcca*wD z0fFyROM+xcI)+QF_orh-@RkK2&b|&Sq?+5Um*zsKK>QSd2QbDIxfg88|Zr zKGZ+nxX)lYL>H7j=&y^u0@b;(tDR&&QH(cb{XMPPs296~+>#x(k{3Mh+1tOF^y$4} zP_wuCJoaL4W^SRw-<-u?=a#<~^^Z!7o~Cu%{=Q&~Ls+!ncF~J122NMUX;-ffd*`Bt zvjfyN5LH0-z-ee!uU<|et@=H;?m8U8gpbYn0D0Zk84=;KwCtc&Uscsj?)%S5B1KUg zk|TVT6=%+zk(@Mp3xKu=z%V%3Xg1yP{)fl<)yRec`luvd4umfHgIC9KlTo>>LixR7 z#foH`ExuZ7*Q^m~zbfOg9KW9#8>?eF8#ErYtG%JIku)EKa(O57Z&k1FPAcTw(#U;k z6bchx;b7+DO_vs8wef2v%t2I#R-5L{hs}EB$Ggf8zbR^R-4f#WGa)x%VV)954mcxu z4EmxFc9cQ^zf9Q>nQowd>y0h+psfDo_wO^yhAI;#&i_o%5jN;gHYV5~KAeHSj^>~a z4*?@9tHZOSCSXxzvHynRx~!Aks4f-*jMw&M+4uqfx3l!4+>FYf_KFhM~%ET)G+w0hTm#`;*hh&-AY6h z6_(_x0;4FHwlJ!GRjyT94M>9a0*w^UHzc)?sVO^a4Ez z8A<681s2+6izEBK(qqdf#5bs;IAH#LI6mqS)W*ocA(RKcsks-~gVl&IBtI%7Jba_9 zENA8K@l}KcB*M_vVRFQOdUK0sm*=NIMBpz$;*T*HceDmf>(^U2q+oMVz9#Sg5)?7^ z@^pM5NCS5uQ)VA{NE-rjx}u*fLS#M2TD~Xs^|mMl$)1Zr4$&l&xM>&{%=Lj+!v|hQ z!^pHp>%XmEBqaI%sd)$Bwx(S0`n4`TUZ*M~KwY&#Y@nx9eC>zTrOhV$!Od^Wyk4bI z>E5)z!)QcaZ6;Lt1cSQ5?RIQwOdK4Tx@csD5(e9771ZtG&_(9NLZ0LeWh@~&u$_k9 zytx@#0la*=gD_RG(AbUNK^=rN8KV0yOHO}dv?+epz3u0F}NR62xLLh;M><= z+P$5Biu5rm3gK!<1azsWwDjA-5NhX#NDgXv@uChSGto!E3cB%ana5^Kq1kG(8{#6c<1!>WC3*&>Mt&D5B+fO`R6G-+(Y=$Zz{2E=lK?MMe zA<RgEoc6s7o1`CV=0GD6|p!&xkUt0<-WIMS9 ziyMKv-S+!=luP~;8=}@OsdWfCGsj#&VX+QB=54u`@X`18&jbciNI?`48yglLuFkJ^ zXuFp5z1GD$rb=JeA-mX0r*mjCKAUw-&LvUa6w0w<$Gl^2c!H6kY)Gz1$lf7$I9u@Q zyFe+?{3kizgi-jLNm6?!iF2NCSZc0HFZ$U{QN+_dbiT`Xy>zxW%8^2 zUwrw@Mk^{REB*apc)tGn7wpcd&N~NY9Z%44%dvHqnh0!77}|UR6bYdDB|yWJ!NL$R zEjackwY6y=wn1GmqF5B-^4PzNTTA2mtm^)K8a*^CJuC4Ktl+kNs+Dma3zv8RE=u@j zn=@XY3Jkukay>ZQ?%g6NR`Ee_VnT2>cKY$j6L+f?zQo+! z#SXP@7K)}G5FRyXw&GApC}*_9SKLB%L=-+X=XjMtNC!d^){i@vC}vFs_%8~luA$Ax zf6644ovR4WneN0}f8$PWs!=bEWhm!Sc8MDj!H)4+UH}S&da`x;`(M(KwlFE4&HVM{ z--noPOg+YuiJn+>Sc0l$sTB~ugjdaYn<5xx0j`ibQ>d*{;SO2+ghM3D_Z0FH@Koi+ zyDoxZ=sGto!X*{6!m|>O%8Gd2Eo|aXO_O+V7*;HT6Vp1pfF>ztP4OZ-9!K7RP zE1VqS@j+A>a}|B;mF`?k(!pDEiZotzDaOh$yr5NYZTIcndriHir9eDhOJxHe=N!mC zkl!>64B=28xLj4ss741tR@t|Xl2xdIrdD1c<;5rYyE5`!i%%o({;uKmk9XITqpLWt z^3FEfcQL!g5@YVCT&B9{Sa&7mqs`Z(=f762q|Kn>Q?X0-VjLTlt6WXh-Hx`(S5xtf z`_C*IA2+8_9sJbe;~)1@st|0d_iY9aMrv#%H8^4o6RiK#{)U_njw`FEczX!V(mr}r zv?41&OVKSBROv za9flhjabfrg5^qw?eWXMs;h6$iDyh0XNy0%ky=+IEtEeatD%2Gq|WCUqkB@qEMj@_ z#S)>_G@)fw;#H;fuksnKyw53IqRm)IdvV~CCx6{_Ba;+;7Wq_x?eS`yv4_Qn3{>L| zpB*yT+q`Z?`0pWg>*dPxKWa>4W5?}vsXg2G+tW}$sV~qnMXS+B%3X4Z>FDzn8qbeY zG!WutmAPppp=f=(w=B(@63L|2*V#}S3y7c_?`a60raw564g+KV%wK_$4(4O3F zRWlZn)nI!>a_FaZfzu4fk__>MW>fDOE?)ijW38X_9Rk|KDXa|{2OT)U$&M>a(g*P^vbX2W&SbfJv-$} z3543)V9Iai$PRbfkjf5aA$^YmN7d<}X>swO0(bCe?N_6c zW)H8rv=)Cm`&jmOpKcEx&a>6{Horg6>fUrkHgGw+9XF+VQ zgRkuS6UOs1qYGvt4-Pyr>#F~pJ+!rvW1G3P*Q{(oQlG_1gSXrKQ=+(N zgOl^}^XJFu-5Z*UbNwA0EjH1AJ8RI}J#x5|=T%eyXWKW1hX5Y~5lM`dV}ZaDn>=+OpFsRKp=H*VG- z3MxNVu{&c$v};F0BiGcoGc`rFxRgJ-7fj=20w&J44bvBx)x5lOGhfSVLa^2Er)ih^ z$i8igo|hT+k52y@(y_C{yRxw^pSu_;&oE%8==WLf%kKwWU)V$$xFSd!CoyjnYd20zc%ZR-B5Pv-aXy$+XSvRIKCezU_hqTG1NLZ<*gz?8g1k zj!-YKELe#4w{8&pbn4@^Rl!mPXZnZD@PMz*j}ZZOkVK1_G8ji>PCC)g#^R;H#Fk>%aqQ)u{HZ1Nsr#OY?|*W4F%==uKPP5Yjl6N zxyTe4u$G=SF7;*29n~T)_$U5-H9T6}5)8k19$53Jd~8*8`l_d)a=R|!Y!WbCSTVxlL|Q!TNhHH+Og_|+DFrO+!TqFXjXqa zXRVi}@6-!BTv|RnwKJo;5_?J9PA19aONQfY(A)fk0mom)<+2^6JV!+Fx45*os|e3F zhz|PZUUiA$?Rqy})YOvL<2WpnSsGoPsDFx%e|LAv?%i9jaUMNtFE6zzHr9Phhy1qY znJG%o$Wiv-sHNA7#(&Z8@zhuL6p_#>%2*TV5-^c?(d)zw5Cbpb9 zdNZqI;=TLSc=4&D_sdPY!;MzCwoIwn<_rgD#m9wA_@pd(GyZX7%nt1xe1h-F)9%DL zt)1dJfA8Su$xlb@{V38tb%8&ePVh`Nv%Rbelb_;xU6Xx@A@sv`$ZtQC%U{^3mLgQl zQ}TlI`#E}XE;;KxS(hKg#a@nff9Pz%gOh=4%bMV!{YyLEG1goQ;5>IhV6?yYhuXBs zJN2XGD=S&fR*Ub8E0lcVK6rP@Wz`crG0hxIF3LO#>x;PNvb4`n`PXRvYBGI48yg<( z+N|w1`dc#eNJ+c)Y|j^=INsVN?w(7zBH5iDZ8~r?@XpN#TEi~>JWhw7O&vJdY2)r$ z@=dwl#-C?(?4{v^6Q#A+TUgfyx0lMN><&!VSC^JvlAcgG>TE7zi$6K$TizYqf5p0g zOk^abxEHtbj(szV z>E#;xpYy0a>)WerK$W_kA#1zL)9LYe9*dwApLvmCyjxaiansMbq4 zn5qAZDvKlT)=oR&)a_G;p`RcEOMeEDhBSgusdybhg~yxQlTo@Kr=$~1fAwXrXc$I&g?DY(h+Ax@g-%Vdi6vr5>F!Gop%jnvzj|5J)-nrxy|^UWDWO)zu($t+gz}LZ zuF2Y~`YgLs`LF8#_-eNL48!$~7Yvo3Pts9ya__sN<|^~kBX#Q!ZQ*~*AXtG{JS)5A zb@J=bX(Pc6xlwXbO%5n4niL9z97YZ=rRE$AmvWBWv^3&R0km~!T#}9Vt^0v1>vyu+ z36@c044y0QFE zGRvOz|2lTfl+|lft-t6Fjsk4~O5}n|?rW1djwPXd3o`R@&&NLh8kJQuJM?3#s`dbL0yYRI(m0SV`F+WOxaG2NmF|#1=Y6P$S^H&G13{}+gKx9S zEm3`vYeQ&9!`~?``>MYr@?bBUfo7D-+1yx^!&U|zKYcIx9(Xr?>X`fO2RgQ^6tlno z&^ItSWPaQ5*t!zebc4q(!-69GwK@}hj|!M0@pH1h_k@o0tqqB+@3~&AHGNw*)HXZX zBR9SNb>z>h5-bnj+6jlIx8km>I~gUB$7Q8fduU_+y8}7m%pSU(Ml3OSTkHIuvAg&0 zyE)U*s)qi|aJ&%wT&UmIXd*>Py#96j&$K?#ppINes&fTGz5eP3dBRgkw{(mJXx?%_ zar1=sk(z;YnC6M+x2An!Xaiyelf-QD8l_WNcHb20^{@_ANTwQR6Pli7TMG#EQ|i8^ zhhfdP7VOSPzcl5HJS;53UM-mPeQtQ;+sodxd*-w06uNewd31>3V232yteIC)Y7dTC z+hj>i&oanQ?-62v@RG=?;VvLV3~74PW`+vWNyIeqli+>r z&eHyAx*66GY^r6qhyJy&ZH03#;+|7A9VdNXRC2~Aq(oCFg_@gHs6Fe`KUy#@u(RzA ziAa$8X{&j4b8pu1+zGVLx|6MMWoT&MO5R9e=X>#SQ+Gzhyw}GDrU#H%OP?6u1WV?3 zh_h@!x`L_+d+zae?fGHvk@shTTJL=}6EGNh6yhC&Da1iq17M{D0>=O%(gR2n<~}5J zqi}=%Z+E&IM0eC_EJ@vy(5a9tY(_3FI>73%vJxpeK-D|B58?5F`H7b%$Ww}%sU`SDbM^WH3uSxz z0&}Gi6A7g=VmJ5DO&r}S{7h`XYT&uQSQN|E%!G&tC)#Dyg4Cg|0g?h@@!{}+gU59r zda=IgngS1;M6gAD-`y=rAzDpPS7%jE96c&Z{3XQG0>2g^W8M|><20ZIS8vFxApSCd zpM++GpAU*c*qq+lY-?rREFqycGcQ0x1)dB-g$S6dAS1se;eAI5QcqqQD)2{97C_-W zJdq(}it!>BTmrbbxO)2gg9%0oUaT%b_)><~Z}5Lm%@lhtBdJ>V4_2s!5|JT9Nv|vw z`E+`Vp|vq-y9`?-0eWHX-UECaAc2Oqwz`1-D%`N{BkyI1kOCkvaTG%H4f0H(2fiKq`9Pwo1<94A*T>6OZb=gq$n@wc@1Ckr z_QNk<@l!J`=}KD@|MOJm!`-HRr$*?3neDYd>~X6%TsFXF&w0CLKp1j8=YO97)y+;= zMmTfKGCL1}#wX7Ma3%|*)a2wSpk5QB-8{U!ygGu+M9WMR2@v7oRv6{V+BY(Tlw1Ma zR@vB?4YJ)p;O#)N*Aof}B8|pqz*bhsQltdJvxo0SPqb#B|6wLe((D?_9Y@F|Fdjpy=7IJQDjG!~FJpFLT zWAY~WgCsuB8U_XaQUw6j$md=P(ec9K6p=!XE6v*i^Ao5)2Z zXpo0s$!TluNW_2`#8c3P_I0#?5%!)=PZNW0ft3AP7aUoA`%nLPASYgCyCLoO#Lhpiyx zc)g+u)c({(xcTj5;lieW;0rdR|ksGQdNm5Skj0G zrZv})*a-kd)?*S1OcpnG5eX{DnMG0Y$-v@~x^2dmzCsm{%{E+MqHP33>kgv1m?y*r zs{?;fnOW5wW+$OaD18W&4>wEo)2AyTUx%QQji_zg_QDydmlNN0B{eAtsSy0twN=;17J+YLBQ!V=t#2K93{NfF4o60? zXJG*IS!7fCi9!K`%B&El1ga-LJ^O4#?zOyf1IL8e?*>PP*GFn)UsPCr;$C%oXLx3@ zh(5Q``}g4oohQTfvWv=2sHiaH-`jRc?nb4_j()8J5`Yag8p@U{GTME0&e`2N&|~V- z`Q*K`SmOQ$iJ@Nnfh@}z*2py^4}1z~D7dbuVi1@6W$dTZ*`!|6i3Izx~Z_ zz1g^N<5KWI(kCYbi60AR0~F@hTjv@Z8;NllEP3PPeag8DbhH7tb{+pnvzl8D;taLx zFm4eIF-e@k&Pxx4sk^r~2!$2Ie@*11&{t9daq*y!B|8rOfFtZflnnv`%i(_=h%J&M zs~r!IEwag2zd}ZJ9+C`UWSSOhOfR<2IHz-SxTixx5N$`*z;o!1H1gPK$nAml^qEZ; zo~$v1`J}-h!m8}ChH1A(;TOFk!4vE5YN~6|e6bw9u;!$%T!_aBDR-gSNCR)tpUb_w zWWp3bxb8c$7gEPEe1Jbvj>|IU3;!kJfRBS7wrw%Ub&;sS0HPUA5FBUF^XPR#`DTrL zLE~b(l_b0jI)jOkE>7qhu_qoq+ii(h(Elak5a8TfDE+@Q98p25#vrsIqzqKYA=n^@ zOIz0)=ljRSUuXBVAHO=)W^@?7~oQpKqjq|kY z&G?Z#7NmaASI|Pp-;71ZZDvbyZ_wDTS4|5C+HUs`GWib4q^AkomUx`u6!D+N(Rv3* z8j4ymw#bM2<~uIS4F?HE21W!dM4DLY`4zd#iU-K1>zuMcWiVIy{Vq*ATwSj!YbW(} zN}&R#R1uPH5}Yf!H7?zc*5iUKr+~!EIRHHg3Qh7{;hIndEX+xfZFM-HWqQ?DJCK1ES(4GMLziV~T0f)8{=gomG)X71V+ zTpb7%6f~AKu(%LsBh~@sJ=>k!30Cu0oFfTSeGSP8c zT*G9j|AzjE@w_(bIoT|cZ_i`v8Sj1BIUaaNDVS+wc&x3p*7<{fnxS-u z_%1vw`wkYGpHWJ?ohZt?VFP7R9{w5C1^L0VKS$LOe3O5=W#HBH?Cd^$4LK-ih{FmC znpoYbsi}=y6+8y%K<|g)G3`b0((RM75Kz8~UyINkq9W7N)3eKvBYF){`QP7_Pr=j% zy}+e4LX;C!y49+4(8#Yx4O<5Zxfr6%re|g}U+e$IoF#V;kBb+!3#ETD*kHe9`orfh zLf1rZKy>*Ds+rLKe>wj`a)HJhnwp*<2PzUDgizQCuMRK5Z_74rHn0 zNb7k*ens%kr{PwflvbU612HL1yb`yiS|j~_vg&{MQMW>#ZN#oBs}&QyX=&Jcx>&2Z z6(uc=uP256d8}YE-HUN+>gG6y|)xF&Lq z@^P_XOrY0*_Ks}v{M$y zsgbo0g0vRx98vWnunN*#YdDTcumu2?W51`5X(8`Dcg1f%-mVT~c$kimtO;mo9>W~H z$;@Z+iy;wEqVy)Q5j_I~_J-FWiiMa1Nk0jVGe1Y;Al>VSySWzh<+cO}2c8)}IweEW zbP27BzX*{p2YZ^80!Vb-TZeX&8$sS4&o)GlL>UQb4zVp&F3ihfY6A^~bxx<2?ftBl zQS`PRqfFP~8Ao(gK!T?I**ND@r{u7^BBx>N?&X04GCB!3hM_Um%nXW)(=KY>85{do zyuvbBwRw|c$*&(NLmxjfSIEoC1~`sB_9-T#e27jFqEx-XI1|VRGMvk0jZe9*D}KJq ztffN$ChkO{xtgUWU8C_voe=(>%dBLsKE-bmQIq1$*Vd3))6nI=oluA7CsFzsA5pKN_E?HPvV!6Q z=L%lal?$^9IY*(T$2}IA0aFT0e=ZEt0g^?6a1~XwBfTRdu|!A!bz?ntt;Xogo@4tp zH1;4U#O^D4O?TXAIShI<8tp_O>5g_Gn%m+y-=8Ltsb#Kl#&T#xA1=OSlui8nEESKg zKj=~4vLQh9Ahc7&;)xpV;@py7KU`QvuAwPNtE> zoP?4+Y<)VPv8F+Q{Vj?JW~D97HP7xbHQ5CSbowtP=LsVlM4+mp%_f-E*BkEC*xF;|i+HL>};%S&+_I*br~c0qsCNWIQhsx`F5d5>Gs#p#~B1ye+;F^34o@Fao zVu>ypjZF(OY>>S|gEb$uV^Yz2+0*4PpTp5#YVZL%HiTI-K#xtPTwXZ_55G0E6}||J zCK@+6PvH7p<$iSRlT+{KePLkF>jqawjC9 zsTjSZr?aB^%$=dtmt8vrBH3S+)iED^m+OS6$$3<{3SRU=saA#epiK#7?~a)^jg zY*()qbSF{*$|I zCYC+~9N{=5%{qHH zRt{Z)QYl@tD7)I&FWl;4%)Qm`$bXBs-V1)HeEaDWp2C0ryV7zi`Tgf(mtHfiX;V>7 zD({pHkuaQ?b&OfhYmNJwxYPO4-TXISmr$@7Uv2~rxBJMUw*TiS+>ux#NI!HVHmTD)r`DCiMhc~$ zsRhi_9FU0d7ybn30RdVO#*ot1dR=P@R8-l-4r|lIR~aICp=9Tc)lQrG_vYU{x_U4P zz1@!Ev={g{eN37(_O@jZSWX`loL=|H0?n8^PII-M)_>&m z2Gf^MapfF54~}1hOXJh0b{bc6&W6UqFGp>kxjt+0{g-9Mzht{S7`PY_(fIL(PXB&|hITxyHF?7R8??wYq7K z+%7WFLd?Uq_T|#mL3&1yMvUn6CO&&dM*fet%XY9KKF=kT5w_y*u8m9Bn$%#PJ6N*g zkZ;Gmi}@nWE>*>xe%#abVgKVVHLxx!sNU~%ux=^2I=#D}?$U?pfisog=d;a}<-TwT z2onoOU8}P9|9q>6>_LKnw=5NXo*1M0wBnpo12qN6U+*){3ZCg3A-#{TGG=&m~J-4QATe>6bWcx*vCuw;y)AGDa_p&el$e|)FLz6u`7+B9)mZrS^upmp& z!8EGvxF6`wuFPgR#mUc?JEG;l^i@04b}na!`LnxRbDPIXxo1TkGA`xOYD@jBiU@z7 z#+aD+x874RV3)REifbZRlNhY}RAEEyIZ@%C9`5xargObnHt1oz3#~u(v}+i7TLi2N zIBZ#}kEhG7KrVw_KcMW)uW-F6VwW?`V*0_5*;mjdb?)$v?$`H`aJ@v45w4v~zw-6} zd@20X&vx`R#q#>)c1nh?@?0j!V(k?h`}na}GE;Ii2vS76r-%m)<_}TWwBoByZU|+Ffbc zaZr)*dW&Pv=x7`wZ>*8&1`^|vD$laB0=07_eD|l#y}u7)?(W!~`&A|=Pj?L1jFnj2 zPt(4bFuUu%wUSQ2{XFw;YmIjfU0-}CPWgP@kUI92j%S6`Vl0-uae&bJ*=_GGMeA5~ z*Sh2@B*dQIDkmhB9YF90HY8*q2K2crd`>7Z=m_EifVtQU<&hGgSmGl?IMN1KbI5sz zu#F-d_uwFIG|eOall%wKAv%?qA|SHd3weGj$Xi~e{Nm6)1p;}Awo-Ely&n$;MbXo#e;`IU}Jov7TqQzf$cXM?8qcY>n z&Qm_4&n3S8{%yY7QTVeN1^^BbT!+9gSRqv?(iqv<&qAS3JR_f8Wv;@RdK*)q$cG^y zJVj%{8;>>s|0jrK5rb451)DXV5U@b?81C)hMMCJF;NQXky!?}EM~t5+_=LR4e0fPs z^<065R06A=gkuu_ID+ipF%32th5kVatuolD?}#Sn!#xAWRu5~^7>t41?L)XzFiM00 z#8Cdd`LivosH9^cJ{drhh{RzDEve3h&XSy!UL|vXdU*|hXri*If)#~X=5na`GMr3ML1<6 zUhy}`QgogxC?s$X0fW^D1rZ0%MobH^yCGUa1=*q`)z5^?GUtOw97`BMh*U83oFaJ= znBUfR4C{cIK#ExIk#O>pq$@QiCm|9+gJHez)8~<5F35SN7ipv_sIR}{UHh@*vd!*^ zQml}{10y&(u*;1Us~0IUto|tC^&~ZkBlG{pX4mz_KVJ|I8O+gT#Kpa5jP#RjQ4&R5 z8;|_80Ch1eo-Y0`ut_X#^ShQ(ih6xQq{2M8Ow{C4Ca33xQ1aSxhL!7)4j35|^5{M~0ykcUE7=u`i$1gv& z=Jo5fz*uTwHlR@8*i%LK!9-Gu=aAroy#paO!Qgwf#zs_ZM@=c- zB0x*3`qBsJiYZm-3+)Dghr?}zgXGIxo(Oy*=m%K`NZJ?7b6aQs-v5(2$Do`}U$G~! zW38^@T+yxpDV1p-TdbXT*SN!j*;l?A->SSE-?Zn%0(b2pbKqt|`&(8`?m2zD3j0NY zlLEY&z@Y%KS_t`J_ahG{@rR;6y|?QUdb&Q$@u-F7`8M3UL{iS*{t)Rl#7_!q$_Ku7 zFo<9dgunYcMhAabnm4){d-KE zOg$(*4%;K~hXBQ@*k45YTkLz-JOYu>f!!{u;IPD%mq;)npe@OPK+a`vUtiG2k9H`e zAG?Tl+v@wahCXhbm8olKVM0!5x}fO}jQgX?V;^~+jf(hm`UWGl@2-U3+y~$4@9z@k=e2g+?vqz~ zB=`HJ{DZIje!->t!&clx%~p)fY6pjFN9@ImYY935D_#Z+wdn1(lzspN8-rX68+fot zQreC2^w-ST3aBU~7FVBZ+5ag#W_tGZUgo22J2()SeIiPim6%H}J~eRQ3B`N^CXh`C z#fWp2#e!LUO#~J-vmo#w*R#xFH!J_>lNX)!`Q;aj%eTWeUj9pXd1_J&+C3ZXj&5l; zjq08K7MngEs)0d7y!X8xo!m~&yrc89cZ_9xZgwVoWstt2lZ4E>T7F(K_MPa*bS*OB--zm<+fR_bMf&Zkgdq!z?XFU zin--gz*5H06DAnnv@!^P+{vuU1giU7+mFdf)ytw5#J_+2U*%-(aMMKxW)f=|~aVLI~KXx@m<$ zGy|shsWZasmEW2PelSoLXwQ1m6B?Mhb4r>(R+^!KH&Xt{Bn{J2bKzXbsuNO~#n(nt1J%+V+ol$L zTW2hDMP+-|A@hwD_uXRtlQj`yk>$?zv1|FdA4mIa4uM3Jm|b&vf1I)u!;5YE0gR>| z9f@5lb-ZZQru4b1U#mwonwe2W1D17+jDd(I<03~k(Y*k(BrGBduW2|GU*hVKm_I!# zC7^0!$XdLPoX(${xN&q5MhizOW@9m6!-&{??bI5aq`9@r=D~dSAycpl(^tb0A4msS z628+q>`Db4|HC7v@st9HsTL1zfztws`zD@nls-4=$<$rr`|j3fH?9Y`qtl^{@X{Se zUQr_kwF={Gd8TKMJE7`ae5DuD^v#JC{`3cT??4Fz?<1?Oha9@!70sGSW|y)BQ@1B5 zAC~O-@l~PMe1zVl%;7+wT`+$1~BX8_!@3Ws3>)&vYNUhoJ01GuVDCYu-jMD@IE|{dYL?7{U>xhqk znUT86Vd8suM&ryd2iKDtV4At{P(d3HFb1^KHm94VgW*PH`LfX@V8x2aZaW}YJWG|liNVoJj9bd|NYU(nX zp0jmF9W6WAHJ4dt{3~M@TD!O|*n;0hY`ZmD_s;y6ySpeYJ^3(-J9n@zp_FFT8EGn* z>Ddln0<^jGfoVS=y_DuzpNUAob+@I4-6kentnb|?Umjb`0WXV}IfYRVmH*Okd?&5k zMfBi}e_|!s7xlH`{JCTiVIN(`?@l_;52#TBH+2nbez|FrAz~1(r?Wxgx-8gxlPhY~ zWwrl)MuP5EldudY8hp%KM}OO#kM8*mHq$lc-c`Q5M6Qdg0~{=#^fsxOH&mF&_$U1k z%Dfi&U8k%^X5r}cw+`*^-Y@%5QP#PyOc$g5_GX_!R?~!-C*_Z`9pWcc;8GT%Mj_Acd{aT zBjwkJZ(p0aDTFg@M{%uJ2i>;h$uR3^YnB!ADJ@z?DAqKwUbYWl~t}ZhZs*P<$2!qlTZp1U!gA1Jg|9#odDT7pZo9dq*al9 z6XrP7K`F5Ks+{LZP4V3YtN*Lgf%kRMUuw)yA;Oyuda3cWU#8=dGbtp zQoWXnfj$xT?#+*X)T+58XoG>~f3Qr3MSpSI28J@~PWuW}5jYeh-bUY%PPr2CTIMOE zb!TnrE)KW59`wqxZ^Z6;xl-I#ys`NjGqI{3l_Lq-N$&sFJH`mdap(I&A^W7`MdNky zs4|vPHhmQ6zge-lrTMG1th$$|{iYuIuoo{?(p4llG<&g4pZ=ICX&;GV2 zw_XlBsff@r4rwl22+1%RD$rDOH{6w8;pd^1^J9~w)ZDhyN0?$5zQokwxiY;Pty86& zR#%_KBHVPZ?AIx0KxAf$mGXbObS$J(~pZjY_IdqCkpsv;4 z$U~nr-7)p1LEU!r_C6LMA}EB-%RO`6PLgc3j(cu6hZsQ2WS~8Qr~0O$|Ei!3vq*5d zlQZ`axl%#EHCQV~AgUulqDJ}W8lfY!pY2BiCb}vc*yssWjfpp^3ZV5^Z^ATnj7+0pD{Yy#7>l-)UhR-leW}&h{KWyq*zEAU;O$P% z4_?05Nw?tR#W^u8Wo^@u>DH3HmodmiD=1r&K7~r`Wd;4CN`DohXF%r$p3h+sDhN7P z2{K^moKd1M2NmdxzLn(HJ@FsNt@9!eMlbz=+@9Nbh9Z)L7knXz)hb9ykrxQ1KLJ3CxCtklPCtM5Gh>Y zC0{`zGxKh>INk>H7Ihl>852X^zfoviQUp@4JM7RB-L|E&*RlOXP7@N*ga)LVV0OGy ze0-$!fqlasmpYRt^A`i%Edt(TO=$|ge4i=WQ6{-Z_MX^X&y{D?dUduGC6rtI_2)){ zYoP=pX7lbLenIiCjmwY7ypE6BvaYgvTy1TS&e@e~5(+M>uS#itc;htpOViUp z<;c_y#GqZT6WBx$yQz1C(_AFM5ZFaR)etHXp8rTQ^TO`sfA=|q(+`$ijA)2$N@sW0 zvvJ?_BPIX6qD~*`YZgtl2C+daw~p@Vo<;{b_Mf~b=U8HVLb zg-Djb*&pSfN=WPk<~FlH0xZ0vA`p%a$l%uUTdz5{I(V8fX=XngX|-=Th2$hW zUAbI;Y&6q!|3I8O(Qgy%iiEr42qx_vNx}fvO2muho}YsdwMB?gC^ICN+N27 zc826m5jGxGISJKx_4@S~608!0vw(t-1O})TFDKkGE~3K916~+plt;pU#9cdf*xA|j zK;*xi0{vMHa(Drsvq`&eSd>aaqa!XZu4hRmZy=Bd5bfpTwZ?}?dN|4D_>D}FPlzaT zv8c}0T1Is5BO`|rdjB2+UF#Ot>k3@|;R3cNtb5f^dH>#geI2$l(K`&mj!Nd_bQ0)u zSK~Y2U>T$+@{4N!u?L2QubAoL$|Y6K)ji9aY70eaAE$J!=FVZ(2JT>=ZH2Wy`!{by zp^uBd?#stG2r6$+PFd%OJc|z}wXl#QC7x;pf*XatJbWm%&-^f}YRcX=-hFOrO1TFl zO(_)W@?BNWa;EKqJnngkB{nrke*7uk|Atvmv*A!;_bsDeKG&&14wDFTMsDua6eMy= zV^<1>q26}neHn@R0*uNkY94_Lm;v2>@f5)&fiD8iWn^Jlwjm%X1A||#Bb>{qmh6$( zVMwurQEnt8963Dub6wW zDo7NJH1_#QnQxF57H-y7`xKKU>J{mFnDx0v)5Fwjf#*)Fu9s-uSf79ANT+G&#*X{# zm)9_Uk)1I!&(6ug&8p)pxmLB}pwvG5&skhrWw$x@amIuP>-_R5HoC;k&Cq-5!~Wk_ z{Y``_&iSmB9p$R&|KLiHFebxHpmCaH@niZD)YE9lR~eNcuvZNt1|3XtA~9m;GuFZxxQ9#fznswg(XA z7+}jK6ar9V2kys|&c}Fj_fIc*i8dBbu_IzAp-Ip0Xl_hzbIP1v<$<|QfWPhSQ&Zcd z%Fh2eth09!PEMBvD)z5#bFs?oD~pdK7v^o?Ax9%Yc11|S+cNvy2W7}}tKq;m!g0s7 zBUMc&EuoZ~+Y1%mJu#H#^oBbhiCOAu&w4*bTL8qwOi1>;KhrgfSteb@rQg_0K2s}J zIaZ2JfB(YzUff~tK*eaP3gje76|E(!ZNCKF(we{lQ z8elsTvxFG>j$t55x89JjeduN+A;h|_)1dzCEIc5X)EfVIjbxp^^XlAWv=0-w7Ch05>tFuVeZyH}RXws)Ld^)3j z!KZ3dQs(&1EtS?Mzq?BdwR%UxyOA5DXL9P*b-_sc(ue<7Fsh+=YoUc%Ia@aZ%gCVv zk)bLy-y}(qvIlCqa_?bUDJd!9aKI)Rj+{k=w8d`G&#H2n2mSzM3`RPV2p25K8%!G@ z)+bkrB%*=IDL&7u*@qJl3BEROFrP-lD<8DQ7Rqg>k2HTS6Q2;zVkdRxw_S*G#9oE1 z!ctLuFq&D_58Ebse>O935I%9>+l3IBW$9O)m)`Yojc6}SLmiyZ^+|p(>-b~!v}RsD zYo_S*y+Q#)uS`$9v6J}SIzp}u01UQobApnY+?Y(85KCNR7okOpOl+&EeByO7gS}-| z-(A2kJnVy$VB~voOV6^~%Rg{nT2jix0(Dy+`YfNxVAD*&qDQ;qcTiP5PyTg{S4-ex z?JmT8WwaO<4wD(5WN8wm63OhrIr!hILqDQAoPw4cZV)vsEjnDI?}N#1R1|Vp0XU*X z;@|m8&(?5pMdRAryy4%p36}_Yi$at=$a=~@`d9p>Qc9Evx-8SOL$o%p`mM1wNe%El zPYm5*ySiAbO#0lh@^7nYSL*ro>Y|RrMe<%eo=v?oR%j8$4%-e!p3SZo8?he6Ci|RD zwYZy?q?V@V`RSu_i>1>SxHl)Pf4(bjTj4_V zL8r+(wsx7Y=K#1?+9H_i)kn?5^iCZnI%vkmAlBvEy?Z?lC8+!Y@u3hR9E>6$9UKD5 z+k|a`%r`|gClNOfd~mR{+e8Q$B0U3Ft%hy6_w#2UDKwa_qR2>%Sr)lZa`>@^h^F#> zMMXc*K`IIv&4Vfq@Y@~?TQEFE&|2eC7j~lFCW=CQj5_ETNbpt4E-zXz%k-^I&vZj2 zkG`itO^JX|5Fo!Xzi%1|RwQH?tZ3T%xK)1(k%Slm5F|7J0D%hwZ5C7Vm5BZ;L+4%F z-?fGt^+re*PPU+T8h-!yWtI5#C%sp4?4XHsXrPhvKgkXCspf#C#lkYzyOU#()5xkF z+!gb<8X-YUx;t6^Um~r>JNcFNrlh2hEt?NqHIcHD5LpZ%!|+yiC^kef@k|^O1}6~G z_x3Xb?V2@2tQo4rE3B=daZYXjZGl)a+lfT6VFM~c@};zlOd4v$wF@5%=e*Kp@4X_Ol9zub4w$p=92N$lN`ZONc1R;T z>%Qd+c3tN4H!5S^MyqhW&;r8cao@SAIESI+x{|Q#Zn+t4Dkc4)jtAQxQ4h$t9dNLC zoETCo_E;q0M4tJW#p|cvkGbbfu;rXAvpnRS{pEW~kpd>DkRf_|aQxe?yIqjBJ!!0c zgfn8_Q)P7N%#wo>S5!Cf8iqTZj!k?mTa~+o6QcW?H<$vYA1??A@cYnT^TeoqK}6Cy_hS$ zpg~TtPOXVonoQz+up?ct&#I37y@Z|3ZEab#_BYZaN9EVs{o-RZIR4N}!dxrCff&;h}6Cg|~3k04e_|Vf_d<`opn`>>sjdm%tW{I_iU zo1r(Sm7HA#JMK%~w)M2%9_4WA!h@xI%RBYnb;PqFW2@%!>jR8TPqP!K1}9c$-+lD= zy(6-Jlggh#E`yZ#hEE&@wX5AFzOI=_Z^@E>U|@>By0JMwJiKvPEbY#CT7K^Kb`Zlm zdvjj()tqd$`Sy-qtQw*XaFPJ16vF@PA*(Lig{zYu2bYTsqS5!}+aIp!a{o;Ymclibcks@zVOPOkrhtoMM& z@@@ZsZ$e3C_Q*;^_9{flD5-2^CX(#P$j(k?NZBJhdv95pNx8}1Sy_cdc#pGRJ^%Op zy!YqxJip)bG~Cy9o!5CD$M^V-&ww$*RW+vNL#sD{nYXjPJ2*Cj(R(BDIzvydFP-hu z%AF3^OLJR+7F=zEONg9x?LJgA&hww>-G(P+RE`~oU7(sIpBOe2iBOZo$WHL{EV>{{ zeQVF57~ul{_i07-M;^3gi4r3uvq>b z8O$QyR3QwDPgCi)#*;R@P4uYGM@6vwq2Yax+gq2tbW2h}OX4!wdMWS^p&SLQj$0=D z5j~(hABW58|G)f0ZUSfl7V4wS+Z&-4W&ChH;0qwtpC^j4>u_;nB?gf;6+wZp#%(pS zc$Ze{T`4428Oyksr$z`tv>7AmQSC(OjeAH$_iSpz9#Gk+PMI4AIHVTKQ{+}zOldp` z3R7N!I2Q$fE{P45vMsBTB$u}_z^r@^U-AELgPaE5pYIX=u(P!YR(6lFoTUQWhxxI> zs<7Qq9GFG@xBR1Z4r6LKH$d@EJ=g3mnHuSOGOps5=6jf}GW&+MU4I}GK*xq@#1kH` zE~K9jO*laseVy*&ms`ohC*il>gPDZd>zg8XheGOJ>FqrN3ZMmYNBjFpU3#j`8_MW_6)uo{l*`uq6Ez$C^G4l;OBBj=r_7nO`;hqNOa#hxBSG5<+1k{FX7B_az%xMF$qQvm77VXy&@)(B-0KT-j z_tkPk%l&CKaohWs~Cq7$jh_O@%vEmB!sKI%&?VG&b|GnxzmI8QR`g z;n8qrP3DxYuO^o0V~H-f^^3_tR2m4H|B1T(JqQGutBSsOQxnpBsQk|Mys)a{(=YTS z-V}MITi;KT={0}sPbbmvF8(yVC(26_7j#X5_S+cA*u_}wVaF`oKR;J+b2la(&)UZmIezs2yCg}a z{OXRdq#hk)Qm6K)v}f##8AzL$STkQc<0~IOm&m7E1$;}Z)4c+8&9gi;84b@L#x(eG1vv(F zN&v+;8R8x83|U<{15D$Qvv51AlT7a4e+MIP2tF%v;=;-T?;axD%+>@<@Gf6^{J69f zB{ilY2HfJ~ZK8kl-362rm&zRpZ_`1kI_e{X>$a)ly}jglXV(QpdBAZhWz@v2?pe$ti zT>0e389=Q58Zq31f&T&+U&=d%Ez~CqGuXl|bs%!$^#k&x1iX{oVq0Lq`wZSa=YHN3 z@QzQ;Oge_8^2Yhd`q3P5F^0pJY>C!uJdt;Wnn+VRI=Lm@C2eDK@vvFnxLiY;|DmK# zRsR*J8c0Vkl~I%qY?n{Eb=M^7gdyGhoE?*w=pmBG`C5R9L*#juC z4pCzvk{L;Yk&QkWECV*Bm)@eNp-wA*a@zid?Rj=`C|%A+IFP&anW-};a`OVV{@*!D z?LTb3X1>Lq^XIlP`>0I<{fXVP?C1fF5XDTdJ4ZB#`wv{F7~Do4Qy}Ix+$|4tr{MKv z%QXR*6dnh48svXSfDeQ%gx>&`K9I4E$dE|hW7JH5kWq|)+m0=w*C8ugAZqb&d_bYB zPzVD|=bXN&9SPRY&0udnZgkrUQX6*`(!vVC10QjD|2HJhI>ynB%f-2i&7rcC+}n98d#S`2NztXFG{(Z;!x|4_`&Y2#la5S?ZWgp zrA@yFE<mVN4b=sE=gx{FOJ;80fDAos#-prI+2CMj<2}DWCo{^qXk*-#zSep1 zI4QUY7(ri#gEN<;^8jdI5Qg_2TwdP@fssC9PXe>U=Vs;+4ARR2jEnm4GS7`x8VE~! z1i=hy6(vBM^Y!&j1{A6IJ18-LPkNe@(-;(6V2^_A92jhBaUwfT=f))@guog}gByW_ zSp8Y#Al0|na$SP<>|?cq1+*XUAfgSt@LwvtKjHH58~WZdcAL9F!smq zkc@Z$+)v8lNbQ59x;+w`obf}A?^-p3K z=I!ez0tFYQ=!~`Z(sxb|%+;)}ZL!(E?guc5@x3L%=I*X1_VGpy>qwk3?;&Hs6aXFz z*jySDp}n6sfP8i3$&(8ol4M9Rh@|&1O_6jIwC}KpFkWDoPM74qO@}y0kJmg_7BeBc zCk(V?*vjsEMUyXetuMnKj!dl4*b7ZHz$EE#XnpA11%eN@00MT zb&Ws&oZvOAX9V)&Q$0S!iuMQYP|^;prAN)g;>nejw3p_tVxW5m-~?N42>vuEgcf9O zm_}G8h>soh^5ry8p7{}kKRta9*<6ENb$7>KonR$tj&?mWCc?2Be zgi$0JQXc{Ug2ath3E#ilq3?`x%iVvD1t9Dm5RWz>+3BxTH9+D867?ClEOH>c2VOt~ z6aj$0GT=J-^yoSS^`Z=Kw4T9>KoE?vbs9Xui#un)^77xBx&M=%V<&;)FBp*CEz?18 zCA_U)cwsKxi~uK?j{*S>AIMW#gVNERomDlO+|g+>6lImTk<SzPj8<5*}?e6LcwC-DFZ>3E{$?>xhQY@9!0m?alQq1KV^Lb!-PDW8k!ZG_1lu> z$`#JGZ@7%sy|Vkt8>Pc-xh22I@UcgjfbbMKMTP}!iP^gh@3b$vb_BQNZeDt$>(vkN zdF*P%gqMEJQp3UTUtT&^hOR%yI!%3c7y6D=u9WXG7refI|Gp8NN!TSg-7sF@7NF<3 zzXkL^#4|@JBnaCENkVLTLe@EQlZjVeJMz$fY=N%Xo72SP@i;3{J#9Xdu*VVK`|1TEXyFYogpBmV%&uYgR-8$S_TJL z=8A4^wJWzlPmIQPXiq7%7$m_U>2zg{;cMz8gwDfNqCcVA{0{bE6t#sIqB}sEw3_IG z(}yARW6i&uVEH-bY;RQaKm(}4!oLbBtB6AVY7xRRWlpAP`D zkMn0vs}gaSEEMJLh>R;6hIz(&oMgYXPxC{)zUAkyfTgiuvP_b9`j^Eg$F@rf!JvC* zxBC4?QlPG2V-0)%O}9sffns#IcH5Wa=Jcb^?&`Tk{W_^UP@S2WJiw_u7Z78dF@z0H zrL*VH_x+&OOLUupM<`}$Ke*?M>hGhOukR7`qB*)^;Pp8vEAYMScO$s4&hem zg(@<;_gmK??jnpH!K$cibr0AZ?L9rMV25StYJfr6E?{%^CFB%}W<&>3rR|h=UO6aF zK$S8?&wVrg{8I?7LoNpR`1tL;y^0y&wr?*IO+qN@57nA$pn|3Er(Yby_5UjT$*u*}YGp=Y33@bIP@24SN>wkHPQ z6Go4daBZl3gl+1Gq2YvShw|)If?Pt*PPIqyZZ{8~jvq*G#X#+Q>6 zw2m9s)O@|Rvdcof z>@Dy}u`gj_Fj+C-QF1X6G}DRB(}chXCFqsgh`pp`JhfQUI?iS=F^{d^-isP}bW;RjH)+j6 zz~<+vT*t!N8|Sgnyi;E6Ij7@=O^3~OpNGy!u%4I^xu_#`>&oWIG+VV=ug9OG2iK>X zV3S`iBRgA!<5V}L`Q@yoDy1A-&R3kVt?9)N5|j2DV8fabAa;}sCQ!HK4P9$ z_uJ^vhg$*COv~nG3beE1Dmeq`Nfia(1iqLxyJd=(A_fkhdI+)NPmZDu!Qbhnzz{ z1GH<}+ge3n%CRadN&hxXjDtYe^*DiDPL zA8v>_js1?)E`L6;Z+pZ~J3o1Xb1CnK`O;$vOqN);*^NCEY&h`t&XGC;8wQRk=EEJu zI=gttVsCT7wi7}JxiuWtugCGgBF2cTg>~f#Bj1fCBP*ff-J)UZstoBYicY<>n?Lg7 z{ZpPlpp5Dfv3eR+@cJb9_@wO6QgWy3yI0<#89jC0tLNCwx9NuBPmi^9E!C{Iw9b{s zs)`sJ^(nX|te=;f#m;XyQo}DEF-Ti~I3QND>(VUPO_Vs>Kg9IRGbt-&CpID!YhD&| z^Nj|##VOWz*UmJv`*;UoHt&|iOtEj1Y1OH&;<*nC3_((Z`+HO>DV#1S> z!v5yJSGc#~c%hmB=vIxu^MO+F{I{_h8VHBHal$Q;TU{$sW54pj>DS?~)Mc@rgY^0R=q1S-uBe)W0|^9|U5(jNtd5b@XE ztyqaVtG*82F;hUPXmy%!4}hpPto89aFjz^09uCz>wop?~W0fB!GNW61qtur1AwavNO zO(uh931^Ios~xt2n?mZx3hQ>F^0dtOnsY`k?mSqy;Z}E&{MubWO}O~@6iiL$qAo!g zZ?XF}a;5?+I}8S(0}(`$yb0|)gX*>Skc0r#8+l59CvkOoG>0`99H2`m{xoeF*jEadF zYP^G6n}H^Q{g$+qvr~T zu>m`J0iK0)PxGgqwG*CCx!yYTo#%#S-0`r~f{Y_)2(}{y?u=OzFN9FKlkhFsTb9W@ ze)vVxj-K7EM(I^>V3rm16BVHFGIjlpFrvM+hNCJ&lX}>2m6K4Q0>ulJM#!-f#5PYM zaQhx)M3qnd=A|aexRn$Nn(f-X)tYjoct=f+%&{s@DH&(j82ga-vhHe}9$Q1^O(GfWlPKKvIg{$$$!yrZjNWRyBivzQLk46+i z2S!%V`s5F94Cd(u0M8Nn@=qzf&;LH(tQ5f6TdTG0JDgY z?NiW=Z!JKJ1n891)DUs@*?Oo9g$=`-#0Dv)Rt&NwML8LkSyoC%A4DW2k$N>Vh|~CB zeIu(i82R!nFP+c3Uf?KL$7jUd&`E%ILu%Jtg5|d6-nG@2ELe-Lr26#r?c*fR%?|f# z-t9>!6VkVs;&INOXt3a0{9!vk)cWxbae1sxM$;f+SW2C1mPyr8TDzXVIA2~x7hk}M zF!}KyT8I6EwkrAu&OPB4Q~woUaE4;Mpf0Uyi6VwF?9xnzrL7|paHA&({Rhd!+NGXf zv4XTG0-rrYIm;Uem>Aev`cd+gJ4tcmxc2VPSCPM~0I6Y|_8}p?%9L`szDJV90go-9 zlHDx1pPYVnBk#4EB8y$1Q=9K~^vg2pY=3^auB3Euc_}SBNipH@TcQtgNC3ps(LM(E z64?1b5za>&qJSDra8WA1f9v8FCA22Cu)%&2Wskq;N)6A-ug|P>BnG&JP~wlf{kQ?} zKZM7LuSKS6b+PI2@W(&c4?5cG>CMWS*ecWO8Qfd!*?LxAr!JQKH)SI2+#pl0{MgeQ z`iX4=`Mp!=uOtQek}GC=j;USH^{8qy^?f%^XsM9w@?7ocR72l(^QGlHh3>ja3MwsQ zUfl~vW&CJXiRi4gOB#dHha{6r7SaiUqkMB^<2|$DojY_|1qQqhlsTCoh`yYyF%4kB z3yGl5xmj6_aKw4RN|nviLJ_K9D#9Rk8L4e_Nr@K2Py z1$YKB#@d119Ob}5w>pxS1nKB-R5k&43KTsBc*WqTeDt-O?oCF9?3qa9MgSHNHuh2u z=6=xa;JcHc&8t#`P_gSpiq&N6WzyFSF0QR9NltTI$2L}t!U?T1=CcgEqovm)T$4ZQ zndTC2uKH$Q_8AJkL~Qrmla#UVjIsWKhwh5EG=q(vP8hzRdO=b8ri5#GGgrf@FItbA zv`~NE6+h-IU`gpo4WN7|#Ib_x5;_xN6i{45mvRd1Y*8pPpj~Vb$_(iVi2I5bEOLf} z8eUFMPp@u!3*7*C76ITi0(4>|ss<`D+Gddb)H?V$BPO2J*9U*WirIL*a#k?cCI#cs z9Ib}xmoYKg1@8C+1k=d7?C;#-tzp9{aQ<)xwf6>?7-+>Ml|pmuyqF$?RGDxFo`M3_ zNBPVZ;31(=2;e_y+64wTTs-=B@6#WDbT!VzLbhGpf}yldEj+E}L2w?}3*={bXMcjUL|91Tre!xfV&p0!&ol2Ev=%5-ZA71?a*pgq`FYb5sbvOZFICj7GdnXU_z=nBVAWUW6b#?s*{U9I0rP$?WaC~5=iTzy;R zn?`(JgdR)=f?vF#K*KB`Lzn6TwbTibx1f5FUmG8^7TJfzAqcSuqjp`qNClF^3x>}+IPmaC`y);Yaghw1DA?qVmFIuu1 z9jMnRY21_^`jKTK>1+i0?W5yEzcTOGEX}LNFtI*uA6h);!iY6!gj$fqqIbqkerr*7 zi6WEavf+)R2o8ZWy_V`!Pk8CdhPtzn8zxgDCF8m3(V1Ua9nNRAcy#lc%v(mE^Q_-* z@=SJ|&R}Ks=vpa7=ml0&zbmzTnQj{mk5I|s|FEOumR*kE)Napy7OPu&{hxP8~^qqJ;v zefe41-G|fUZ|x59xgJCtpHnFO6u{1+Qo=yxM$|OPIjfsb#kD^6x~ok~kVs zLeGUy;ct7(u>*5!itrRhrzfsd-`)B+oMFboHpqDlSA&}hEE559b94KWe661mQNb>y zsFYUy?1K6e7X!+Ofy&yP#FsC%=If!lMmX?Y5YiyGAXG2HNDFz@BgQK1!^pWKOJj@q zLasSicS8-3g`wEB{7iH~ScWOHFlFE!mB)DgnJC(2gJQNnQvrkw+|pF*p|&*QJ{`NidC{KYz|vR6(W*1iJ zLJ*kvb2TtOQB~!5$QnzkMU6^C$SvWDch1igN1WNIwqd(t7W!3Ij#mlp z0n8}?orVrgfIeXlFRAq&j>?72i>mJDn=RVlK?53cLppns3kqdqSP!xQ5VVBC2!S1Z zyn%HOWKUGQ12q&))fq3kP#PJ9Bsq4XV@d_+0W)?9H;*QJq$#3|%)_6ahXF9TL#~hI zeR}x~2QJ935zI75k=;1VLhgZQOvne+6~bpW$GI(dSR_F{Y@&;ATIC_df7`B>&rkAP z5(OD3uOCkLX@5Q?eZ1H7s?y+#)ZTaR@5pFcC@g%X3t3EIPoB)8?DtN5wtMv8T!SyA zI|#D?YF9P%ly;-hGOEgc^!P(s2(eV!WBB6r@uEgBY`}*}ZwHpK| zha4i1o}>if*$nwRx;chMPY_4|54qj>u}%_ndsW#G#q= z90qXSsUZdZ#_}7=I4NOS9L@1!j8#R<6nK1Z%ywf z`L?iOp`vX4{B%F-!tA?WVb8A8e{kN|G}okIJ@gQKktOEVqfr5N6rdL(g!!stNLREH z2g=~*%5LaTqz{YBXM(P8={RMGQ>RO%>h!mh8g?hB|d0z^gF z9|K@=PK-U=cY3TDt?SxX4G8yZ= zCm(Kh+lW6)YXrncwFE_r1Kt{DkV>)o z@H}ua5ZPcjUk_<6I2hO;@*e$yg^t015-5c9-y&^tmLw?ShW-1VJV|}W*z@;%=UNLR zdyR*yoYoE6Od0zV3$%s3Yn1&MYEQ1q&EmbSTD|t|+X36SxeSxKHvy{-6Y2sZGJ5wc zOz`$Dq}Mo?HoyJI0QL5ZILeRA#=l*Lu{UEfXX=7`C^&K&U<;&W{`fv$%e2oJF79@V ze+V}cKxi;SrUaOL71-B7jSNy`@x|MqEkJn#n7c4T1m;)yeMHrPV_E=gkpNQ}(J}-> z6hKVZf%ZPKvRpo5eFWpSh1K4HvTEg}71)9SOI*}Mhz{I#``>i`eS-O&Pi*>5=VYEU zCP(#RVflBSUX9`86)JDWv5RRunRnMi-*25#z3Ki_2xrmX&Fxhc6_qEvl;6mo?On0) zebIAl77wILKl&mcRsR;^qx){T#c%JF7M5|uf6p;OK!417bH#r)$TFU_5| z+5cU3ptgR;9OdgSTe&z(3)xDOcAJmexM(P@I^>o7Oz~F1ZRhr}*t5JBhVNg&d1<0y zqfq&Qor2VNyv3Bljyus#6upG5y>&;^%*wGzxAy5RBoCRJG(0FWYQ(_S(WYIXHnILx z>sKBpVrw3Wr`tqma??;{wk7J36fJKhd@!0ktaX+X~2f}9gZzfIs!kIFV= zRRexyv=?0-NB;PmtX#znBMH?dG7U&M;a*F=2-Jm^YdP=+f8)6r!2F$*RWrutcpqNK zC#DE%oJ!t883I?%OMD{kTH$ujaT;Ru3q@H@Yng~SGYeY3TUgT{@`Bzc(LP!7>F?j2 z3NIQP8(nOSDR`aFTIG$ks$v#qPdz=_Y;7tm%kc5lV2JSTxC+t&gW{y+%9;l!>K8;t z78N(IV&|vFx@4NXdDKOnJ4Xfh%_)9U8c>|cW!84@34gp3o6`&jRX&}tI|Ku_F zQab~54JetaRgRP@C2}A~1S}Y~-+Dg#NbG}~$K=}A)1G_?)7VLFKBA^*7H;!{1mpzS=S2j-Bm$fH{ zX?0QE4ykYMZ$Vz?W2+>maPJ(UKN+kg4Ba5~lD^I9*LKvM2u(#SKYS-kUo#{=#`N*l z$uP2L8=rE{`8Slam7LC8(mlga@!?<|+mSvH#=TJ2@ze)aVX&1vr&Spg{O3Bpc)w?l zlb*G;4^lm9eDAJ|TaxETtvC8p_1hJO0{qR%Qh49*@#ZRjw%40I!t1)M}&GhuZzL9_8S{@+cjd09MefY?joK}? zj975#?r$DT$~f}&MM7^cn$*n6;8h_$1|rL?xeJnTq~nP1=y4iVR}=qS^qvhm7yl`J z$ed>D!WX@D$v?0u=t9ZM#K0-R8Q5rmEByV!Bu%-_8aD8KGS%ywLS01I^{iYsn4Qne z@tg~aBdKEX;R?FiH$Pn{Mi@|f?R?G+h3xdy%X=DK8Vf5_0S8cr!okD&bDbch=~8}P zn&SCWA+!N1(fk^g3M@1!5j8VCm3y{^(|ZDkqD@CJQZc)t1A0H-sE5t69m9nO8`QP9 ztZK$rg7xH;_JeJDVTq38&)fUwEQkNH)2e-;xV7!DI!++Yu90o76iy_>IBNnnQiUOj|VZ2HJ8xf2Jl7X8L-nD$_0X)7}Jzq_g|IzqE zoBZ)|V%*c`!ylfr958OF#0MV1pZ*yR05__{9v3M)g9~lw2HlJ@a29?IP4eOGyuG6m4#}!f@a;S}9wL!9=^3&K=lSlpmAQu;{o?`%f?i zpa=%J3_#{nOZHzVxV_Jr>gm#4!0#(06AubM9o-k*SXg5UgI**vQ6@p>W@r&Dvx(*9 z>~u!Vy5_1uAcmmu>{A_b8^5FDfirLPX8q(^_t3xL z1^l!9$$Ahj_Ied|GJJpMtdt;!nZUjNcd&ZV-wro)C?DLAQb)blRle{7Dyp?uKWRj} z^m!=X5gv4Ju}1_~gmGXh7i@#DSOrmR8u+Iz>)F6u0iY%=qW^g*D^5~L=g^S8j`P`0 zb8Pp>a_BhBrAztpwj@`v3g5_a$IQ8%>Bla+ofxE-&AqB2OWiEX^?XrA98h-cldm~$ zBsN&IYHuCZPj(tSsEFrx!2ASuqY8<^zjTN`+<(?d?c`s=FKtmQQcouD??{@+^t({L zDs`4IzFubP>u8Mwmm&5}d+M3^^s3+Q9GMO_IVtx?RL7NdP}h@~P5E3{Y^vf@$rp7y z0$9Ybj^H-wu%`7LtJ33hI04s*J~^CXr&a1kslQCH?!BH(-d9;xG3i4TBQBwsAuRhC zu7IT5i8%7Co`prK=R@Si-^_oR@+RKcr`PJ^R?XQJzSHA8|NV@f808o9iP=^8#B7Zw z4S4AN!ys#h_kfj)>*^BM-&LS->v;B&a0}XR30}6*GD0DKA>)d^oyD?)k z58w}Sy38pKKUd?x`Lx>!sk!)ZpW&CUS=rZN^t2tFSb`;qI&<&gHi;COW2A4#QbyES z0qJB?ICCs=7xT`X>lqK_WNW)}c4mYaU7Pi6dxT?{Croh7%eaB9LBpqa$o(4Y<1s=Y;vOX|J95VjCbdoikh%w5?i_M;{o@+dW?Kuf|Ri_rI_OF?U zz#??tubpXMSLzl~zOeUltR0ux)LNdu;QZ#NmTS;f91&rP(2EEG=8*16aSCQ)j=3e_ zoj5e{aR1}6kTz#|$bSsCxTGW^Awhq67gAi%yct-WdDK_`eW}pK+-40Mu+5lMm{?{g zTe(p`Ccx_)=yNiHhg)oU{c;;MrOvm^hsV@^O+^F>uHS=R$LK(bzxI?XsR@~-D34tC z^2JMi7~J;_yvc>SGiz6 z!0}YM+2;EcKjV=4P2AJ&5{;g$iYhRo%A-inVHST)@iSojSoB>P%xCec2G;kt&;9pp zbb!Yk7#fN+(8#Y~I?;8t2zziqMNLhRu6EYppKr7|iveo}fohk=ZAx(_dv;DXy*bYw z2Usny7%q@MpI-IR$*BHX-S2o}`&y$F&c0>X70vorC#6Gsj>&01Vc9$bSmg-T18TSE z3pH;D`!OlpGRnlKEVYFL(b~aD!=8gL4>!m0aDUBH5F`TG#(&4<5k6;^4E)*?^}?qT z!;LC=oR4kCBsTB`+Frs{6K|$tIe_txOep<_`$qJD~LMJvBgIT2$&~OeD z-xaxJRj8=AX>UVn;}_7c8W^wUrRFnuZ>`PeT9T~X+%spQx36NpbHFi<=SG=G!C6~= z7AmW2NnudD!`y_514l>~*g_r6}Q z#gA>)k=v*LyW?7bvLG-7Iu&FmfQOF{l{cF0LkM8`Pif?j4Q1NtoBv2fTdn?hZ06K( zOpf=C24CNmg6*`b^k}m}N|?55&2avU&pX^sVRFxw_~quF*Ld3$=F-UvqY}+9T;clI@&(r;sqBAH4TzkA*dU(=@js za)aH)xivL6)4n%pRQK~K7=G;JZNAMjEsa&R9AK532WaJ z*fNGqgN6%~*2mu&4aE;Z4etxFpi)rBi}Eo`Jmx|y1(<|R!EmSz0<(sj2>+fUS%N4( z;X>4h)b|0eb*+B1Y2Oan5wPd^bY2ZtH4Q)4ZQZD|@VXf9Fht5G=z)}-IU zu2x_$ja!L*uLi9tVC4p8VI>oC3$~WsBJk&*pBvQ6%H@ zhx!!p9)LT}%k(eDg#H=WfJZbhoO3zXF&fn6k6Tk+qm0jArIV^5m)CqZ32&6yg&m%Z zb?$w7q9PV6(ecnYoXfW3HJs5x_)uR3ey+Yw`}K2cb_v|^3u>8{#M{kxN}TTd;%S!M z*CM~#Ev}JU;RpzTW0VyR}E2afJc&m@f=G&1Kw^HS{Xx?nL*-qp`5y6%!CVvJablGe& zrawEKoQld1=4L~uoK}XZFoggQ>E_keGQ#W(l3Z>-pa1LOoZ4n|1(#g(4gip(!k$eW zSL*f0;J8*~Z!4yI+J2eRo6)9a>j5=gxGAgAgSo zX$K}Xiswa^q==vrAI}J@Q(?OF-`Q#i$XV){Y7p%Sc21CIDBu|yrbjnYswjI;_ zH5L8kp`$1u=P-p)F@4nYDfvr}wKfzssanf0ZEQm*9F(sZ4D`$!Fj11Fltf4kkj(;-^!||R67d822CT;>6y1l5!)4}crfhx>Ki6c1}i4ERVC@e zt5ZUZJa&#l=8OKgwNd5NHZ9}JYMVJJ@ zs2SFG8uJ4+2nz8@&50FW!|R@K2$ahaL^;^N5_@vfRD)ivpbY&UtG} zQ*f}GGr)P&yirW~r-hQCNw@^~nlcIfX`|xZ>pAUBLEjR%ilUBSUYgpxA8j9*WOa%> zkD99s&w)K51qS+#q9sEo{Jb-T|CX70IGL6#sRW;OLw3&pc%aum{{>rjXs&Qf^_SAg z;4EU*<92ZT@{8Cx)$BsYGU@gtLoS;hm=CdePPltf@O6k0Gru z5LOO!CG;^CWjZKG8Zjdfh`YJDNqzI`-_;cIXLluowy~n=Ak>n-iatysSU#)s7QC}o zgI6b(;anoC(aB+@DbU*;SX87}IcC>=j1GtN^*$xB3?-v%tTg(qa&kTF>K5MPmiQQr zSMqw?MU5ZZYABFXdN94{agbjDW!cur={+#8k!lR%1zd<)XTf{M#v9D=P{c*E?0_K4n&qzBjebE>Jk zL@MS8U8;$dFNLzvF@|;y6#iM#-@ACZ`gFgU+tDRQ5m9iwFn|#}V$mY|UYN(`+Mc#A|d?s!rtE{XD;q+yvzu z&I;>9oIpIRf6UY_m^7fSKaMhi6fo01yXcbFskCIf)oT;GT?pzY)6&VjdA@ z3ET{S5Zqm5iu=26=>v%g<@&kc96Ed1hwL4+tC?hr)Ly_|STT3$^R1%Mu57t_1Sax` z&!xS0CB{F2!&4~={&d!&vKD#F^&6Gd4L8ZW1a8qqS^fUJ13v&c^MQ4ADzX~i+>`_A zh3SX*XQ0|41=1Xl4ZoB;AYHBfar6I9N%pc0br!kDyk~OhB%T{A(aLGu65n%p-=7Ri z_W3R&z<&m?<7Sf0z9N^vHL%-LJtg*)`Eri5_Mc^R1H2>6M!(589(CoO%C>C2!v1>> zvJFt=9Ow?q?gOch0E6^IK*8Z<`nTmXW)0C(0Q&+3EHcV?FjiI7hF_XBSJo1i?meJd zdtmu=K#kRbku4{albtg;($!br>dHGKOYZaBuyTH=J5P;I8j;z~v4!Akxkc0^^r!-g z^g<3}WN_Tq`78!QFHZM+?0#X;v}paCo>4X6bwG}yg&s8-83u!H-lM-0U-BxF0P)kv zVjronnwpwE!ki7rEJs1d1D-97fa!i)XeL(`7I;PtKZof-CqFxs5u{(DF3k#!nkbzM zsv$ikBwn+d8&fYIB&`T<``}OsllAtpwKfkl1NT2z2i{|-k_9L{PvKrY&VwH(+Rq9% zv*GIaP{F@nABn%4XqALtnxhHmqadUju|r_x4-V4& zCn^L+|9Q^?DP*LB-V2y@`LNgqDrrNz5d6}@5HCl^T0pm;Dp84VePX=hwH#4>`z}>x z*JrCTfXAhKcd5eUM>{=b*l5V|NScWHA_ZmcN9So7iQa=k<975qbJl%s!p~T%x1RiZ z{SVly*haJ@^%Fz&IWh&CDsp{0;1(_wTraF1xUY~#j!9O#@N;7J>r2p-fhDAv`7cn7 z4eAyBUE=Le83)4GL6t^|h`@kd`-0Wx6vPXc>DB`=4!yf5mk-g*rNEmzR6;~lI8DCg zw_$NXUFeE$>|*S4orgWwC}nN(%G>T#{H|grplM;efROg4@c;?{J67Z0u3&US2i;sL z_62wGy6G;dGpkDwG@ zR4|jB{dj}J+7^&jBMpAy1p{*pK1Tn4f&L^*eZ`TEBoguUDiNLAnx zD2}cQ=>5g4@9+~on6!hF@m_zF?M? z!t+3zTQPT)YY9A72M^*jv1XGQW;N;*AP>=BI`(%X?#G4iY71VNG(x34b?M6My@Vr*fA*@hof+!|ACdN&!|!seeJ!+*!8yY zwbVth6~7XI5zjWhPzF4;YfS&#TXejx*u2l`wOwWWK33L&cj?SA;@>YMMD?FTl`H>& zFaBftFm?=&A^w7@s>h5o)!dK*EF0!pU>hlJn1-GD`V1X4Zi`i-7t5uQt> z;<0y}lDiY6KXyZ9I%Qq=PJzT7V@=8H(Rjy_p=k8WRSg1ti($nQW1B?ru?pxh0Z=ao>Ysp{wi0oD2ZQlVV` zRe*v)mNd095lgP&jZ5kuCVNzjove)eRz(fdBlA7-TLZHz&HVD597_EnAa|Ph%5*9F zwNnxWdBo%6Jl(e+r5@8#EzOK^^6(j8Vk z29rnZO!jjgqtaLoC9J@Fr|`PgE`bTda~#Y(%sR@}D@#-0A@zX%cKK$^q+NDL{y4|& z5Y?5s(@OBI|9ocM7>DVA#J@fpc?+yriP0`=JBcA-JnvOnDgxBS=?=~$6Jc4Nm)d-6 z>K09Yvh1avHP-{{Pg$z`myaIluFRVPLEyzY_LE-uRHa9 z@wbG+K*kivNs7Jo$Pb?Nz`76qK)r2NKu#h{AHGb#Hr<2cbe2;B72Ypc?Xi{lDH3?61c@bSgG$s+0`agR>J$q5};Ar=$@0A89(kzcqB zY&E#atHgBv;-5S(MO1ZbO=vx)Siz<&eCt?D1w{W5k|=)WsMNSuoa;oQX8ial|1-5R zX_QrWsY2zg)x zeB%KfEPzRJc6LG_v9cYps+fZ96e*k$s>68R7IScLP`>4~HXfpDU4@T9ZiO&FZZr{^ z0)Ch?0QDQdyETHt8A(>Je6ONlE@K?GwxiUz9d2mt-)cVbDp~l>#>yhaw;P|0+&FC> zli2yN6bhzX06-i7y`qZQbBLf+$J#s;bxF<b4nDOwb|gy2I(T+4r~+I%hcj z;xhF@?xe79Shs~as*Aqu9`bvl8abIci4SkA-HDPiJEiJed>1a;TTX@^iI2jO&n>9= zWaZ_-AAMqiIVB~9BJ=O-otj||s|NHc`M)Zwh(YoMG^8tlYxPyw7=o0h@veY~xnbDF zZCXfhegqe3+!HIR!o}Ud3wjzZBmq10y z6a-1jw~2;^YrTD%gt%l2tN}?n{(1@cepIHI?GVBX_f6dnTv?}&j0&XZlHy*Z-&cD> z5BJ382kBj#jyssfwfi(iet~c71;VuQi+v7k{2oo{lEs?ag%aDN9I_ z3+&Xcw%sC2BWY{#^4O6+Oh$aTLMc~H==Y-qpoK3xqg3T})Malb^E9tVl?s8lpVyx2 zuXp-)>ZX7xN84&KQD;~-dRJ86{$g~pnJ_Ua(iL%YbN|t7Rm~@s)SUJDd)(>8H0R^4 zfzTap=E9ev8PP$IYRYP9Q9~y-{ALDNKn2%Ev^xMDMDpQiB|o@d?E}FW3=-ml-78X? zBM=3mUI;MADghGc-j?$-Gc%)u zJrofh(T8Lf%Al#{0AsKL0o^%&e$_a7OQB+gURuw-?&Wvzc6euAcCS60^%!AMgUa&P zul}IKF$T5o7a+1gsJiO>0kVgMVxqQI3V4g<)&9OMzhfDQS{mrd@lw9dtsch_^x&2= zi)!btv8(uaW2J6#?&~N`wcaxT4h(IN)m*oA%qx zD9tfoNWP+zpjb1p^p-CjUaINpO^xJMZZd&k+)EB;xl8^_>`nU1KO(8vpdt`)6?h4%qq-DL$-kJ#{#TKkfU#n$D}<=4Xmomzo=|>?b-!Q!A<2-UvlvMqdi62f+$CbF8c&3q>mQ>r*9XEU%m}C#e&VJ*)RhmmgCeT_Vle#(H zK6w2neGbGhKIRKayZQA+9h^)CEy%o1y$4-+xx$R z;nc5#Z-g1Zq`cG>>`C?Vv|262*aZd28xD4PAWH0GRI`Kz$Z9Z1Fx@tnm>5m-1;pJU zT?LK+P9UKJ#tT%1&6f$&5qH}m-bbwizvgg2{_IJz={tMXHJ?JR4(hCL-65KGo1>-5 zlYK>#>6it}{!7MQ8fbkw#`izo7%TU8;YiK6W=r&5W?x}?Ce;vj0E-LMUu_yw!ykqZ z+N!p`-gL4F>jDDo*&`pdnF2n2Qp;vXQ|KxU^LMdwM?|=BoE+_D=EJG&zI_oyp3Z3h zN6I;Pm+63X65e6%vGxdpKgw)n{~j&o!DmEI19!FA@qG%*u`i(MN1U|f-Bd1+2+jD> z7?^b$C8z=Y+8X$iKuG=*GcRna>{Bk|zWX?2D<|S4;3jeLMI7hZ9^8CQmwEh$Wpj)H z-_e%|CCRrc8QL8C8?}2*dTT;C5a!n~ll&`n%(eg*Z}g-PC8#vuoMhI1VijpWwVd(f zBCP{@i=vy-eT`C=jU0s9+fK_qE|xR4Tdc9^ywB( zX-Ll==jhC9jG51SHLU3~J|!w%I0(?|WzAIY?$bU>={b*leYr(%4zcL43Ts<40KOwW zRHm3~ag$e60@rr#w>LZnAdYXZj_J-EQ-td#!vo&;f8|$rz9)c%tN|FI%L6uf=2l`bxHyb# zTpkHOvM6y|zAyhwP-8@}U3`0f<2o&WaPSXbovBf$MKN(z%A%@uCrcg)BW)U%$<@1q zX3)aUpFW0b5L9(iy_ZknWl_7_|ItO zl>iQO#Ldti7Mf#obONH@?7n_A8FK9ZVeBuXs@%ReZXA&kr9-4!r9(hMT9J|tDQT4M zMil9gkS-PJ1_41z8YKj!8v*H-l6vOt(eMBFfANg*>=$R8;ZYBJ?{%-W=A75{x$Kzu zUk_(U+m-2>mj@p%_>p3ACLnjaUy*Qu9O75fv0)bqd~IcYkIe6E3hM8S>}?hI9}y#7 z+`JB#*m6@Bb^L=7k<(bO61_4aJ!vls*3_nxylGzn$-=3;JYNjbB|9*_Vubj|rwkf=OF zxeW=Y&2X=NG#8aJzy)zmcK(>@*T}=msQB#F)a`Efk+W&LeMGZc@9G#8xzpMhk=r*G zm*_I0q34qv34_O(v@Yp+uE~01m`oJv7kw&hxrkqT)wy30XU6U9`Wr5=EjkkBwcYdf%^oPuj=h{X z`W-Kw1=HJ<^f{rEUD1|6wrUrl~oB3op>%B-`UrEjq zZMeKc!_xn+rT##cY8C>y*SFMmzeib>J4;DOhm^QDHY2u@KgYK^fxuyJh zQMMW3z`WuIn7TWF-)IdcQrZzdM)fBIue$e+$y|U%8)>j#&z#Kg=lhRwZfmGF< zVc%fF_e@G#<;ZOkn_yQ>(`eo09Md-|q4iQpkJ`^bVLyx?1rv9wk|k~#fzs#X(LCA> zGu+Luu%fOSR)uOmt;1sS4Ab|KF1+d)P%B`wNN-r441yaRg29wu3sDm6x9Vq`;t_9a5@1+;i()-C5B9pX1dgLrYFy;^(mO<(SrGA5 zSzX=q_#7IdSwpA2iQF%6F7>=Ic}9^=NgT{@jbtPyvocM5?dt4vd&P0#ybc)5SY84N zqWf*{Tk)p0m4iD9S?(NrR&2{#D3G@D6_pR5QAoo8Y?#Qb91$T-kcdblS3Jfzn%;Q& ztj@<;VR-f})i0Bo*9E54Ly4t0eq~(3Rxv1|o7UZUDIRc@*so5u!@&h)(r7545gfhq zH!?C~2w6y737kO?`iU>3zc|*K_?{iQMMz6+XMr- zw{PxUh28?dNe4boK;a4b!3{R^7iFkZD3$Qheue!KkK}S&d%)f@RpA)^vgEYYD5)P? zDQhOGUr=YP_lMX86vXzQ>K{Dz5mfT6D(mRr9QZH@dast9)U-qr&v$UQvvBOA&Ahe~ zWy4lAxSznB@xqJj_s35IL@<-N9mJ7fGR>5x-!rZM^4RlGg2QUU0;Aw@8>m1=qI}7O z5hL%oilI^MI-rkNCrR`6sQ%8$`h;93v zNgyKRp+s?MMv^4jmgbRKg4|L&6y2CIl8!D_MwtBB@-S`S)f2EZHPARIZivq74Q9eY zTcoQ~MlhYuQC4ts0qKSXCFDEMMDyYVXnT7Q4K336J*-gI-6+_SuJgL>O-%i4b?^Q^ zy(?dG3prt-W&tv52D$*sh>{(O`pd*E7%n0GU=);qy8ZW_$DDBtjT7O>9#=1hH+`a_ zXOpgNQ33R_hAy$kUMKLATXPCA8Np^uE1uV6LA%EMtS&&f5~1ZY?>!ykVr)>>_~zoafJON$uUd z2qp=Yd=yZF)4p*e4iIXj)f7{v*n{L*L(n^@cKql2q0eBpABn|*%M*mvRNN^}jzMsV zNO#+2Z+G#l)LZ)zqmbgfX4AH+%R1vGDrmTmI66WKv9Cn#HFs(&Ro_gto7%m@WU-}x z_T17XJ(-c-0@0L%FlZJu3O1FLYTW5crx=?b8mE_YI+j6%>UftQM1^CP58LSrd&Yyj zBya28B2ck%QxbUOegmEIJ}1Pfhiyy0#)`bRF8}NqfGSFZhb+5d{ZP%d?LTXyoY(|n z6jVC;B#j=4hTVPzj4mT^1pVLlqirFMAn1IKUA5Ur!CZw5zjuH3w5TcF2>>=H2E5rr z=mu}W<~r9-k>;EJ#FA>~OONsZ7iM8Uy|HdNDdzVsZf1iQPop=Y1V5u!Ow08e>rb6o z?p?azwALLj|LhO(pUPm~GfjT;hWlR=XT zRjEd|X#84492GW8Tz-GYzo=P9bX=?Zi6Bye^XUaby#;m@G9*XFzl8?%i)PoDg_Su20@ z1_fm;`%Ynq+{}_6($j#~{1_#_=scoE2@OS}+Aqr+i3ig7A z9;^k@i*Q0za?q>~UvwIu41~q-HimVUxpqH6PFbAk#){t8(Hx0CH!Ju@ewfttLT4wZ z7kV5Y?r5^aA;~nzi4Vt}^;pFvU;x&%QwJwUen_EI`}q4uUXw&MNAAyAHKJV{cdJL# zn&&Fvg!Nf%&B(*HB*!d+Gqk%j zd*3B6<4SoKzb`ntA!10VyI}{uT0WYze(V>cW0^iDdA0t--x@m>A2v4YbxmVtDLlXb z!79>K947&Y4{xP{LvZXWz{-CAebKzz|L-Orv#Y?ymc};e{QSpBQ!}mFk8b`?!dQ z1McmhUOuc`Qs-sN8bcc7aGu5rArCV0;*$vDW6aFSF#$UfLT<^p8>RzGqjH6KDeyWb z=8xW0pV@cuTDni2yh_Yz==Z>s@n~VAXegZPO9>_-4hz=vhoQ>eh}R73K}7*)L*1nu zmu1k;ior+p@U6g)*{y!E)oBWzw{6FKA;G!VtmW8{IZBca5h+b4lp#L#RYZ3Q8m%}D zXR|i9yhiWVnu(E6$yAl)3jV0_fEHvc)IARuJ?-lENP$H$`hOJX$;I)f0RilAsTfo~M=J{Zb~ZVd14`RBk-EOqj?vOnspUs4zDXo&e}n*V&00Czs-c#Kd5r-hB}NxAYzT@4c4gPU-pksM-n?p5|*5l-f=Tc^^!= zX~Xl3XdD1^Bt?98!gbsR!DRp&@n_VQ^vfUFQAX;RI2aYXpOj>E;4YI}Nk^+?5je^+ zbbC@tO1pK%X>@9S90ztOBVJU3noJ7s;+F6z0_}7sv&Rcv?fKnt*5Z~_6TwnZyJgoMQ`_o0rD9<4JvvY@hAbp(^c2Z| z7uUP*#M*p;UUdllH$X!p);7Q;qC=+u8O6dq^;rj;ry9|Ei~kf%FdAb%L4-8T$4g z?hmmatf<;lg5jzk9wV@A%3m$K^o1QGbH%BD^nDsgi-U7<&F-E2v8CyLTZ9vMs!whrAmh?fuK%NW`7NC7ItTpgE4JCSW@ z;MZs0rfV5cE+n-}PLTFR!c=m8C-#rlGVG?sl;pC88Xue7^fTLZZVmzVW+or%c=Jth z@E%;JZ)cDL;}O&u`2lS#Ke~wS-Y6^`8N`(F$nZ7YGe?C&9By|=cO_9;zH))QMmQS{ zM-s1O#Jp=_2$<=kch2_5?aBb`Lq5!x(gW`3=g=r+h!pvTy8QS1>m(qbjI!Tfdk9&R zoyFJQet`vxyg7P0IvAf0SZd!TpdhMr}u6JCd- zKG|Bdr0odxg+BX`QowleZgG_@c%B|DZnQB;8AKefl!SDE*o9+PpbLTrv&YUJ;iJXI z#R0gpu-ye*&q(SNNH7tm3VUqSzl}M6_Ji~JUC^U3GiSG(m6gT0ML~dReOc{wM@|h5#;Emo@h10_#BP`lPB&VRUxp^pmX`Z0Y=_m? zP03OSbe;h;&& z2I=4h=zU_Ry5(;EnCrKU=bP;3_&k-&-j10l=fXPA;1EYabTsHstfJJTN1H=Jf;0ED zTW>q>1(z4t+!P4}tPLX^CeR_E)ugi45(ePeMhq{a=Ez7kjZH57{nfZOS(4chet7rV z!IPv)P^lwYMK3n-Ph77#+?iKsz%g4cIH$^vpn%{e_z2kp|Gef6VVX&6S>Qe3IN?#x zd1hyiFN8iMVtp_wa*=estEIL>b?eD*TXoQNgDy3d(?SiEppr^12^HkUmwTo+xO`5( z9dPt#W`Vd41_bFm$J2X=M)R+@&Y2HXEoL2IhUFuX22NfUcCCl`mXi;_H-bZbrBWdT zKZ}z(teOOXG5#qEqZ842q)2!GFhTy0`EV#R&>xa3#5od%51Ftei@Hd#1K#Zr$ujG` zYnAf3xMfJ=>;EMGbu}B`Y8eyL)z22fv|_?!81I9X%AZ0*^1dI)n7@JOjK|KejsagGL*!uoK1LE!B>%zX-~~WMye-G?da%dd+k;Nr{@#V$sqxOt{sdJ+o>! z*a!HG>mgaUl^X?2|C9MxHQZqQ!tC?ku}lzc^;p$H++ha7=_+$06NEsQ>dSATU2lD*4kOjd&L{OOv;Dp)UHx#8pn*zG*cVCAg zSC=ksHw31D5TM|Sx-H)?t6S8k7l`mCO8Qc4)HI+l>KaSSZ#nWl{#ix#h{U$B7ZdJn z{KS;)XWyhn$oR2A18O-T(>1_}hD-{YVsj^6bKuqISbA?$Ddv(;`tA^KVrf_r=^)q)e{!Y1&(8lt-E|?D4~~lN zrRAXazvZ=G)#5S5JEDAo%XqL_&?hN_U1uxyHe(V8+ImU~BKc@d?dRCMGg%xmnI8y= zggi_Ph4DJgY!=@mF|9}cEPA;(GZUAjabu0Uo1g6AwlZUNcHv4qc}qhqM|7|4$XJI9 znb)nWekOM3h4zq#E4T9ytK0hq&t+kFN`k;qij2ecqKxh>Vbw)9P@8Ef4wjb9U>Oh6 zQLdx}-$%6!xWemsI6E9>HWIHF_<3@otifI2DQE5D4+doqgCfvn>@Bu%V&*Hf?-m}t zN3%L(yO+Ih=eD;RexHTQ*di2nmK;6Xb+0XL_*i@hLsS6zzViP^>1GN#cT0A{F5a!i zLK{$N-iS5#uj~?c?l7GgkqtbS3=(HnKX+i|)!9BxB8(+xBV=Tzw|uCwLVF_MnXj*C z_eb&w>NZ64+kma4fOqY}1uYK3hjhjddvR--VV-X(2zJ5j+nnL6MJ@T?a1ij(Cq-rY z=AK7>&d;W-+rBrN&!t^F8cZQLV^*_j7slM-K2NY->Yh5Pd`*m4Hd2fAk8KSZub?tO1zE`_&zKj zn`=ZULT?}T2(MNLJ~fb!Ne~|irv~R{ktB5?oK|nGx-MI^be3Tc=^x9v|q6Z6DK=w+Vr8+4DjxOeFN+qeo zLx!Yfe6nGu%L`-_B@)wEZVxuarHh6pT!|dy5<&s!R5bX(|&l+3PbN+ zEz>beP}rLIE(>1SuX%th{UVU8R2_5z>>PgaMA@FqvmkHmBS>z6WQ%foi;B_fQT@-a zgllp(+Pso+X*Y$CHT3=Ckr00@5g^j=P~LzJTDf4bs$-AOd8vQTwC3F6@Zb{WWh zkpLRdbr}P$0ijDcY|Ti3LuJs*7rC-gxlKHW$`YbsFGDQ6xP-(7$lAa|z_W?MZnm#> z{^@jC;M?VsfL?{@=aXyq`kx*SrW=%13z*)Xue`+o(Py$_mTq_Ame>z}MjEflem9cz z6A;3v=99Ojr@xtJ#sVhrm%?8;s#5Z27b@)fN8eF_Spa#a6rzlvp^wlPz)FQ;80B{T z*Y{TWR6ua<2_@-2zmq4Vu(^y`uN-K%vrk_`U)%2D<=$rOI792sI-z@efrPXmn9r9~ z;0X;pQ|$xAqETHs7VmaIdj#^N-t;?2w#XRua6;R+iYJ>0Fq~4v9Un1sfnRB6VF4Wl z(fcDIM99(r=~4c00}{D|z?Hxr(0HcK+cqHCf6Fb4X5}jQXNOC%#+SPP7yw{249e>KojEMsP84zWE8A{&jp=s+&x!XON zghqFQ7E5h5IJ6t@W$4%^4*c6V+N}DE%L|xKNMa?P;)a$1OuhlH6B!FhnR^ zf}qiw1MYu=@N{fEL7De-^p+eX%&hU$oPJGgsQA$;#V@!8$+@W@DXd}V%;Q-$12`Vt zlBYkzK|odh8fQ^X*XEBnt>ma$5IhNZOiWCVryK5g!j;$N2wCd@r8oMXt?0s!6v%Uc ze!QorM+s`talPojMhZPr=+l$SBKHsSD@nA7ILLW7y!}@n!Jg&4Y90R%gm`<~9W~w; z`BPc%K1e!EQ_s-$2pTLa#GW8q3^3w0Q2wmDSXv}Kun&?=D&HlJS--O{rQ_f>d>5(p zkCQgfb`|0lbr@8L%T`RecbwxkBhj(2+;(N&deN)f#`R#G!Oeq21PuZ^RPPo21SKck z@n`$Kr=3&4f4YEzM2!IL=M(~ZTsGjsuMc_w$iBhvHs@_PKl1`{Hy$VkpdxL9-!Z$m zxLCdQcU$8K1T%&r=s1`$aV0Z#X@5Tvx>bE`cI8y>YUaHT6amWK8y!QLJdbhqoifO~ z>8nzQjnA|G-1~&kcj(CoYD58z22R(O3a3p;JHD|M&g>!yi?8=9oG!-#?un@-GGu#L?IW17z9c*P94XDzV1DG$`L~T zAM5M?9{}gly`(Lz+VxdSmg=q1mja~)nfcJ2-L!LaG=%?FxBaeKxQ-5y(jj&cu-O=L zsnhsT=hhwm1{U4z-+j_CfZTpjCoE|C$T8{J^L3Ga-fk*5XGW4#F0j$cdf7NVeocr`>oAiX{2<^Z4sJ;jf7G$12|X&JktNTp;T_cr+z>&53q zP!fK=Au|}M@!B3w+H=>Ih~85F|DAj`0Z4A|xLQmSO$N>RwdS?0{-{gVj12Vok*zyd z*c65R=QGKqZ%Wy?z(eMVCwBO;>q2MRN0=Yb*T>ywdHh>HpTK6x=Nux`XyHn~NQCyb zTu_R*jah*4cFdq9ItnCV|2e04%&<{DK>{DlxtM@2r0aaJ&x`uTfpFHH!lhs^ z6q;rTniP@7fMzHH_VZiI+6D%3B?W&4A93fddxAMCl@I~Q2{FbsV>>`I{{d8I=!CeL z{Bs)D+zR4jgsIg9K)51T3$STYRqE2q>hlvD%C>7l{Rdaf?WgVL;1mo)G3(+Vm-b}c zR(|A59IJcwk+o3hy+$i0=59Zqbi^#|l3zw)K2_W%wGnvN;b3;-{bn<5vd!K(Y$tv) zjPw(!=ckZw``B8AQmX9e%iY}9VGn3~@1PBP42UT4fg#)t%HhhIV`Xa&nW3!5WH@nd zZa>Mw02O0mn0Yl&x_0Q7z{PP7+CHh5Cyw&(4hV`>48=38?rdg5-r;Y)ghr6WxbOA| zY=4vIoPBiuTkF+J5)u-0lxhfMj_4{_0Sl7LMuBBoZf-7lN{flHF$xM$&IjZF)`z%lIAp0M@bQ-(dg1jq{c_Y};Wp~>U?<)9 z+}0nyBb1{A8vy_I#w>W7WAqiR@;<3u)oM278!nIA-HI*Xh8e=VR(%^Gov~RjM{Hus z(;POqE~{D=fBLqze#o|ZHCFqJN)%Ce`8U>xM$07Zx8&Dg=;czQ@<}x`;s#)wDkAgr zedrQuhIVaj5%M#H&ujy+_tq0JIO{P{0P0$ZNM&p%SE%3S0i+(~Sw=<%dBRmvQq*dm z7}WpXql)evC@Nv}I$jzmUChy<9L#i0_f2g+JfK1~K_)N`4N)B|hC!IpXPxmm<9hT* zX)NtOPLH6X(KC?;ON*1&3`+R+>JzQw_a}r?T_fO!3=Gi-GW33Ss{GB2^iI6lrvF{* zv9_Dgi4iBh3pgI5*(^JUIyQ8aH|+hiV%Nz~XkT2JW$|_H_x~{~O2;9vGn_@pV1Ax> zsXlAFr-{Mjj>FRPTop)CD(c-Ki#^$2|ontNpQ z?oEuKbv99uY`4Ih3H4L@;RRJ9Q6ar&Uwt{HA0+KgKU9!LQ-6uI>;21Ax(0o0u(5kG zLcc8s@oF&mlyvN+@3)9N>a#F$n%zWNGmf|c**xAcd%O+Zqc&cRHUkAwE_qZa;tXyw zbzWqsu?C6#-|@~idJm?Kb;>K4MNUkZv02H*G+JQLSa+KTjpr%_Nk*0)1W}ngeHR_i zRc8^|=>*pp)CA*`dSwtfg%Eo|$|40FF`GRm`FL77x(gsSu9$2f#zsfdct8M+K$a^i z2CCK&dfR`xeroTnCT6q06E;T;7_cD`BwG;1XgpGCgzPItPTYio?-8F}=Y$_m*i`XU z(;eb-?Ap1ZR-DLRc;eMR$6hlv^I?mKa^aUu zQesy&8_^8vMr35rBLwH$4nNR9Lq0rzdIu;5h?NuR3B!6vBFdl^`UI&oA#dK$f>3(E z_7pr-kf}n?%33hifMgb?*nYeO<|i4Dm^mOK{y+-bPhcZ9;z`>2!dh~hk@vrcD4NxA zWKg4~ZpZKWjZox6EUqc0#Zqi|_q?;eJmN0EbesUDv%jJ&LhO1Oyp|y`gG$iHINw-} z!LcR8Pi zd=>5~9x9x%M#Q)zo(xl;M@$7YS`q|n{{yw9$|nbp1yY)DH+(+Dx%z9Ak7IL{B4B*{ zprzQ9$TerBk9C5o#aKl5w%K=LEp09@CuSHJtgQ^Af1Iu7p51bMHJ5JNaNE6x5-ce4vyj>2#YgAi5YT&VE|~n;s8|y#$T`5>>zeGT*6VRhBqJjXmYDNL8p;LkI#OULXmB8Y>W#1e8er z$^U`l!Fhzlt)T|RGF}C?a{KYML798dB$}C??VS!+zKf3r#w-T9J{&}#VHHDacJDZk z5{xqZuQ}Tq*gO z%eMa6g+KSYhXDTpYy2EKf5%dvM> z(QVk2%AN$?p^AbJ2U@eI4scMQ&?a0Wqg)7qDh&*Km?(g))f|7M8<(7{v&sYU9*8{* zp0h*&J8DQMTZ=OJSX(=nm0-{eSP=EakC#c=kX+f~r0u+%^kx|1FGN}Yz1*#nQlLf%*IK*c`@da7<~5wRj87N!}%WummF+Vjm7f z!w*gSZ|Ds4mUL1+#Vuv!mx1#(W_htr*jQEE*S&K+$83o_!*;-ap1Tq8%-TDd;>qLm3ULN3ylm1L8 zRTOJvEn_Fb%Sh(KAfo&|!u*b;{%R&CLFFA7h>*4^mhdNka>{$efS`SHG2jC4hK?uk zM%6@X8Zzs^{)0H-1{_WOt^nLfoTC%}P2WuP7I(F<`{PP;G%XV@-R=3W4{yhrh<(w? zO+clYB!4N67qoZ&Pv=s@2E3}2s=S%|@Z3KBKzky*Mp|9#4h=8w$%VwBo~ue%u1I=+ ziX&>|ZeVe?002kLGmiB2E$LSGTAk>JK}R^XiJhmvQ>7*OSJ# z?zfTwiB3uQgZK%9J<8C}^`I-L;NNYFXc$Heyt$vRXc8goVk(B${gyH1SZxplxm`c7 zXA@KLR~z+A?cy~w=M?kzteFb%f*JR&u;bP^({Z~wP30AumzQOPUk`mBnIMETFHivD zydMq6r4sryK0aQ>cJt;XL?jmz69cWA0EfBuS->X_A_Pid2E)qj+cen!; z-xA5ssW-3fK$`+(vO#DAky_XEy=+9aEOM;%tG}WH1+**K1+OXUN~8dDc1Og7V3C?0 zeEb>A!n#kd6*&1y0Z##C%Vom=__vqj9_~dZ3#&Tcj?)*cmniNHcf*@EfpX6o+d1uE zT2rN!wbp5SwT-9tV~$)PlSxbe)U-=_7%vjaf#gX-$QcSstQ-^d(?cVKYbU8tGAa_8<`$_-sa?yPBKvv@8Q^{w{h zo%{0mXwaAxN9IF{nzr0C$0sjtXDXH#EGElssr6q9$6GSd5=+VH__>?q%2z{6`$>Ol z?IG1&pt;m@(uk#hc%3X+gugYF$i;}}z)}&2IFL3Pj%|Z8upna!dR6d_XMm6fiE>V32Wsyas|%WFXzMaCp| ztwk(+%Wv1W`s3k|ys-)!@*(2=NtzuX; zw6CC*p|a|h!G^xNgZpMc)xai6HL7LTPO;%gh_^xHVR)mhBPI$Ks=b}tBeaj5$8svf zW%KqIh7jpo!91|2n66ApO)|;-{D=h4!X*o2BXiJH@P}j)IXP@Z#AuQ3fdRovNS*@7 zwQRf z!fJjz7jyLK=$9ubc&{VZ6&ZS%S%bkElL9^DS!{wK29sznh6=(AoqBi*UuDGvfD}(AFia9n+ z7``YW=cQ+mj0lN*s3?F*LGGB4$D84Me&!12THkvMCB&TqRNEMOo_~*}TbFy_&K zImNnH3V$#UBouf&0wxF!j`R4c!BXCfgcM5T$^)Lc;oF3M8pPYW$+#sOpIi3B%(x!5 zlJ84O57fbyUD7%cfS*;OfBL=9=5~f+JhM8Heqm=5#qw%d!%P&S1XzR58d2Y@WNq3@ zCg|AI=w!D7g-lIL#c{TYPz8-F#q4zZ=!rcPC#B*~D0j_*WD0R2! z(WFfD7CmmKK+3cw4yL%-_X{WxP(NI)@9caF@?BDR$^X7DL1lPf-ut5#iGoh%PSk!& z$)P<$2uJcL`-_Lc*Eoxr?`-@`1OHt@Og3;Ns^5j-)mf5pOW0NY=x9}~s}i`{XrU=i zjMTyGUH=0qe&E#%b{8@Okm8t?^|Bu5C`#^L-!^EEyurU(3hvF9PdMdZo5ey=HvEQg zkIGow@V*oUT6Q7yYF?G|PdK8(68LDfX3(j+9j#%cb$L(qNAappe$i^-)7!d!ZO6L> zgRiN`!&Rtm5i(ldfu7;5_QRGQU#~nP&GmfL|&W$RRB6x=jL#gOs%0ZRVry!xxLJ1f^;hOw~0o`SVW-6`W}wD)QN zC_H&NT5RSr;bjF#=BV|`aA|0)g3-5U2$9MPj zZwI#_NNCJaDTxA-f?k4zvPo%H!^Q&;Be0A3(rgsR&Yl@OwO6Zd@92B|s#3f9dWnwD z#*Dba{jGS~zuyuBFgQf;N=W0VkA|0K>F`SCe!1$@+#$i>mT)73n#_mQi!_Lcs+_!D|4R-?e!j&_4Dkr1R4=J<-*$@MQ7 ze&a`*vJ~C}yUQghoQOJ@6Kz@lqJoL1^4xvQqXqLq5f%Re%sxW2ne>eYtU@C>E{h7` z$}8uS(JvY!FYMnxM*&3)>s`=%{P#*|4Mekak?UDFq%~o6RAh49Gt9yRqE)~LzaFfJ zg+-2z$;p$g);Ho62U;4sPoW-|h&!o-JOTOtfRJc6*Ecm!xpuFgyj4oLq0xfGJ3thU zYo&q--OvI})D7@jx==fQu$dv*UZCY0J(BTmOgkC!iR5ee11G-hY#Jtz$y8w?k&|RL2VO|9jTO-x~-L-;Lh= zq87%&r#17Uo4u2pWPI>NoG4rc*r56O4b94I>?oJ37My#V3z0L88;-jwUEFNhzwPTm zS4an{-_^1M@jzC$6&=Gi)+4but*RVA%u1oWg=NHYd|4k;jtB+^jKG(&T>a?P_9jS+ z?a$aqglG>B`t-#vJ}Sgz`(Fu7E!Sl zXsB)Zz1MF2`j=6rCj$?3-Rb@~Xf~d}Mm361!eVOBM}1SAgzqm$0B&IR-Bv8KHY*br z>c_xb?t?(uiT*as4@~Y-OeLbGoG44-nm`7b`7A|msp)fzYD@W$-L=v?jMR?U+E2sz zY}r(u2OW`J+xmSgZeVt!feH}3{~1HQrhHNs{lg!<9cRRW)d;*mm~(Hq_V_IK&^tQs ztMvfy#~#{=d=J)VtRF0{Y#$COkah2GKD^HCKDYh+Imb4oUdW)O&98(u<7gq#zhB1S zD<}pui}gYPr!YQlPtCNj$Jlb^7bdZRlVJXHtBFfvs}%qj#nW zBtwte{|H12zRo?69w5HT>bT|k+tWL(_QO!S3c!1TWif2!yCHY&jF%SQhn{8nYCZ{1 z_sRlZW%E(aFF6*jnSS=9zNEEAybsP>&EKW`M#4d8`sX!pA6J<77D&yI{83vU%T-F=@-$Tr!?C@FMCUZTjpYril&P+yEE0zm*L82o6oS-Gk&E(1Wnk-qPw~>{C(p+du9C2ME-zd2tXiu42sbq2P$M4guVq#9807$M9*nV_C=B zOOF~RPdQbsfcYqwQ?DgpFYLwBR3?@hC1ENznWjN$EdiDS~KL zLGXN6dK@cjwqRk&NxXD8s|U0ctEptm??I3?2k)v$Ec;#Efb%l3UXTblXRdpzeiNnc z3bzNokZw+1|NBNQf`WTGJ&snggZ1GL))?pVn&!!X*9V^sKfS`!d{L{U@KWAX0ZPUjf;2LX}lIz`_dI z(^GfX|I8E1{NsgFg>WJbEn^2pJo%y9nW$ zMGhX&u-w3=_M~;MyZe)huILgzn`!Q2ql1q1mzSUxXLsKWF2=)xsSMM_sq&)2UK}EUAC)$h1D;JBu0F^c3F3 zIp@O-W!#q()(=)QNp!b9H=sJ>3>$i5iuo_CxFpkOkaDQch9yWc&n(m}405NKb`;LH zX%B6C9Nw&;?(V(wO7b`TU~G`L)y)k&3O`%XOX&|ng6mQlWk|W{gHU8EK9H8d*#K3|H z`XAnJ+g2|VD)1A0wd-!**6YY)rS~B|bk-yGdx$ljiC7lkB$8q_k-`4^JX1V;G3hY+ z(Zxz?J+w1KwqoXeVZ`H{h~|+u1_om8(iVY@PnPZyN z$z-_3ZT+O#w}rvMz4R1 zN(l#9RS2QMIp0;`@03n>eVtp&)sIP=JUcPrTa={q&&pGApx`yiVAIXV;DH!_Sj(*J=u~% z28v$$YZA)JQ7!zhea{5p7h5m{MF+dtbf9A8ywAU&+41zI5Nh0;Pq}Y4(&fX+Q$fgj z-;dfSqw@aEDq8VX&ohJC8%Coc##e#|-)X$b4|&g+@smxGBv^nb`^WO-N17dlF;wqk zu9`gQNZN|F5=qpbT+j}Udcq;iIX#sAARxHz8AQ0BmC_}?F2gv(rporm{iOK&&~sBD zuP7Q2D&Iop^(B$Mc&BA}&HW3Y}eD zEfDZh>bYlWJz9W|^;2ym>B{?Ly94S^W-tUT zmG&qKO!_0^9HrNy2(b_G_QU!bIvCGjO}xw6(0O+4s)8V{13Uqf-YdRpadodWZ%TFa zC1au=n5+K~&nCd+iC7|k+jQvq6szB<;Ss&#*S=@H?l-*2NEdAjUS=iXxbyCLb=&Mc z(-NCq{c={md&prYU|q_fz8rL0F*ipIT41W?0?k^*E;VctM1ji$E&UP{IB9zNn)OuMSg!qL@}mPOsCAzC7iD9%dLLXIN@H~jwjx0lt-eD@RqW6st9Tu9Ns;g*WS@p1 zKVlC;+_3O?Y?b()UPfFa5DSHvh!x-|MBw!mI>Vbje~-%@Q5WjATKDZAds>##@b+z^ zPE9Q=5X_5mp86qtj`?Ab>X_y>)l_cQRSJ%7#6JTl%)u`B|3$;ft1&0AIdt3Syw4+a zl|h7r4}&0I9M9}tx@mdni2K`bLtmf6T?*84^{gC1`G8{iv-aJ?n0hyxCz(i z-0$kRSex`4{o1y9Q7d`u#l?Fd8{@xlR9jl`OToojqPHZGM7T@>gC?ZY>)~AreqyTj zsvkt|bU$&W^Iqr`4}ZU2X7th`VQVm&K2}#c=&-2Br;oka3p0R8w)y%o_Et+CVNkUk z$4q^Hv_YDed^Wf+;FADr7_RuBdPIR{Q8R?KmcexaiIW0VAeo>eBV=hsf&zr@R&1O5 z-%H+9VK4fn?QMZ}q7`7O``-v9gCs7{*i*&Bm?oS?j2am;Kr5LWAQ@j7N1OFn?%lTQ zb`3u6(T>ov^yjn4~^iCG)F>ge5Q6(Nm+w$8efs*T~A-PX(q6<#h z?%{6;o>G5VYhO96`$e*pA1NcOAB*|kDH3AgyC`4Fag@fvHG;~W(b)9;>MwEW4OyO_ zhEJw&=N}eej5hX+KTqPGqaz~ExuF)rc;cRq_N0w8$h||E@7{+!n^+V6SoEN<(!Vph>)x9;ez|NbWd6kFh_g4eUU# zgnrkq`5RV*YP)0;JGuRwyI(~R!&ozNkw7Kxu?zmx{pw*l^LT4_ZVzsjb(a*>{JaRf zjR@%o6YhonP-1IV!R5CgbSz5*bZOUg03PqK=2Zq1@q9s=arp;)V-9XPTE>s=Wl6C~ z=X|<2iAuNoqdCVel4LGy`ZAR<&xONA2#F*vIZ)PAe+02;?ywags0Fes7~bcn2Qbj| zaOVH}Ojv%_IcZ&e@mvU z8s#RkpC2$jbfo$rBoT0o`ddPcxk>rwq!^~-BmHUKH1B7vy^bgGMjcbuDc|+lM@cWA z)ix4xH#mmNDbw{TP2n%cBs>mw!l`5%_WbI6my!8szdf}4;$RuE6SaZxIgSA~^)%bZ zD+7~(`L*NrP?*3_onG_WT2ps2XSzi$2l=j#Q*UZSoEfGHy`~WRE2+RA z-;BwuaeO0Ebu2q}i0+M)R1bjVQ9(iZO?f7M@2Gpw-e+@&99<&ztxCY1JXPc(Q=snr z^3X|ecx2tRb79KuPSNlxH$;yq<^1(yNZaN*Nk|$S)5FmUz=HmK4OT|xk76=;v{==^ zKG4UTd;^(^pn6DKtxEIs86>&|vrUc!wU$yjS8v_&t~@dc*4!=oPs#wbxf6TMgCRol zlg%Z|Um?oI^7-f;IQ%&-JYUuhcd7}}q=>7T9t6c1H&Z7&d#6pgyLibuDDM5ZEwjgpfFddFz9X8534t3KimolGxop!(aOQ9?#o4l zF#hMIl6IL;RaF%Q1(f}>R{1HfuIqo$Y8VitV`R*{`91Ib$N495Yll2uysFAraVt9! zl0^yu*WcO7q%v2~sWEb@y*_ZodRE0PE?0(L@HO^_adg|0wB4*vZECI)Q<^~pA){=gUZ0BLX0H^)MWc|D9QI3!i z4NnYKoZ~3f=*#a+14d~Yi2hSvE8E;U_#5?QlB=Y?BX+8 zALSuqp%P{uk-l_+d`f|dei)R`hhELZb%}M=Lgq1rusaYmEN`(nJ8o|Ba53jzk5Lr7 z&ttIh%f6h4N;foeF8)(7b$58M<-1nR=bJljbJ_u`#aQc=D%Aqk_>!>^kI3Grec5_k zLTfGZDDW2{YkS4+RAZ29j>e2#ic_kPkjNNsA8RPVS`47>TxVdX?JOr2;;0~*E#$ro ze?5a7iT`mk23&udB6O_((bNWFbx!nOLgFfl_`Q39T?+|6;MV%*-D08sKCF6v68a(I zUUu`G$~%Iu#%TF~3F%{(3>$qR#a;GJ>{ou*X5JwRiKn!sNFo)1N9{Ojt%=+cN|1K> z&8v1fC0ok(Dj5J;Of5<&>e%5lnd~$uW zTxxIcjOe7$B78K0TNIt$e~BLz+GKZ z+9M0k?O~h>@!a5(G67;1b@xvDum(+}41?|S2ifM_sC-yys)3iNPPj7Ss6|(jbF84A zBLG&9$@mey^sr%$G`(fu01wlYtT)y{dU~W>zaG_jV+Bp$Lz*znWP3dG#dO?{kHc|S zUg87<6_~VDVMF_m$~S^?VY^9?-mypNZCmS%sCIl~fzVHe;hBxT3rlb92A3LZ<1`5@ zYi23~FpE<;x#pJl9_CgEkurqa&b=6YAzA;!`@m)j_e7`5*k2yre#bYsEYNyCzG(3e zX**CsUMK}fJXD=Z%Y~sv5C;be6^@G@%@ZMVO;Es=ONsFC5Q6kOpv@Qw8^BFj-Cwn2$16*Fz~Z9UmO`%9mwAD2Wp2B#Hwe}aSkE2Ffu?X{WV3+rcxlk0Qi z7$xnK0qLS45u&BGL`)}}hID66rKO{Km|4WSTk{{;Vw0(Msp8*sOa}#d-8Bl(C)~D~ zdg%#&u#QT=Il&*eY!|@*z3)u^_v$v#NxtSV1EJp4bq|?C>A!vd9vvT#p+el>=_@*0P0_)D z=etsQ`foiLuHv*Mw6cVnuWuHejWNrOxRy~K^RhVTQqWup|1LF)F7Q4icX^U}fT%Hn zX;!9XYGuU3g#Tw&N$cvinn+@j7mWDu0`NMw*p8$WR6RWhhqUYJ zA^UUq=3O@UDf8|(g>Mlot-P99j?E3|Dky!olut1#uF>@)E=RtMw~}yf>^Q@!x}8<$ zj8NsfC$}s8%lI7H&#`J$$Ni;OeZ0qi?7yK#+8$4_*|TM?w0x^eL-tySWM!os>46_v zNjuw$Q@E{Gk`9kX3=>xUSx)iue_55M?whF_Yw6tG8xFW9j2x$h5^tkCX7`i~0&tWH zyQB>Q=J@m?@8e9>u%oin9L(2!n>qPltY65rR%>*buo$csZR?GTe|MwKT0X$241Wir z^9e-UB5^Wj;G`b*@&!S55Toqyi2&5MD|?IGNl*tOjH!g<%h=e>GBiH^z@dh+FEcA^ z7GRIg55cMe8phgvkDjaz?wJD_thFXF8^IO(& zCZ7Brqby-}#$NI{TMQ2A=byv^V?El5wIs(~y;8B#_hZn6rM*EG&a%d=Je7qPj>dtz4-I_xb19RiYq zTM7UBJ>z`e_q*1dwa%JZ=gfZhexK*Que{4~JDNVw{(h)FYX5|D)8MNCJpYu!hTxN~ zwv4~mXNj+K6&LLsyE@Wtk@dSupNMiGoa!0XU4QzUlVq!<&vCmvX6YIlRFX)60lZxr!Mvd|5PEGt#uhOQjHFRCN*G(RC{ zAQ6r~=y+V!#*15q|D-ZLN63D;C3X@uYtVx?6vw8X{VcpBiCP)|Gx=FOJ$vz8ej{9s z4?=^Vl_f;C%Z ziPW_{r2`7(hBO#VRIpFl;jEDU++lm1ahr`woN%`u_qI)dzih&)2OmdPAC}8L?XEMpPBj;2}%)R9|QFD+R z+NkU}CaxcP_ZtO@{WV>R;h0s<6Ct?CiR+jo8fcD489Of(%Lb`8&hZD_`rz(4(Zi%d zztgefq?)Yc!YdIQ_f(ZhzO!?#8~}zZs~AnK#FIwwd;UFlAJxJ;uy%;4MGZiDERKOC znS=M2i&%{vH80*|oQV%Y<^E`^=HB;h>Taf=-y|foze>McAR#V8PaEgTxW}u~Uq`+Y z`ms*T_(_5Mi7400hoL)tckV;jIIBu*$? z<@+QPX7MLm8JV)X6k@^{q)3MPM@aYPxFK!S7q{p82@`vzHfx2r2r{XATAZKkyU{*9 zDMCr#8{XM9X>skUM^)=SaWD4I6H7y-p)59au?HG`nTLv1+}VF9Z^H%zSOW7!j+wct zXi6<9eyi-i2h_(+5=_QAA1!}NGWO<1cI>8c9{Oc;nyWJORB_aQUgd2q`+Y2=xanTM z(k7l|MV?dqr!>aVfcP@X%Q5k>ZyD4xt*)+4_+2fg`wu*jq1oB}1rdh^E7u`my1!oy zK_~sBu@4^@lfq@HlvF;)%qtgU_$s8b^K|_sdm}@OMtBtcxe({A1h0+3QD!BAy3S)u zg(8EnF>XJvQ%jc8CiRZT2mU6cpQ#v1<8$6<_F-o@A6V}wjj$WyE1pZ*XnzxrFg(nP zjxUT8389aT+{%aL2i_DnCgkJ$I-_NR3fPG zUCSDXU)L`Fk8;+^?@$?n{)W#wO@-@f81CQMIg{w_l;ROXmyAMqk=4Ut65%@H7L4&m zi>98#SNLnw8K%J<2Yz#(X0RpJ;`}dVl(3Uxq9cRfbBb?j!W%|?X{+bqUt8}BO6A8O z=$Cm#2P+9HNOVo7b%p8UFmYo%>a7fSlrXsiq_UZHkKPuc#Nv%!O7zrSr{H+{Ls@pg zn<4PoEF5i*UgGp{G${tSHZ!!m@M8GzL0J8n;6n$Q&v^p>0;zOlPfwVgi0sy9l=b+L z@oM)|4S(8c|3u_%?4@j>rq8FX?D;&q__SsV-iuNMvqU`JwMlt)v@&n zh*l2@GWHTYo4y)sil$gZAgRX#zbO}3ssqncBNlFRXXh#b(yk>iwV2Z#kxxqJ42+6N zG_OZN#M{ywcw0Hbez--Z%xW2@*HDNIn0FZ}t$z3}FHMr~x`T!+9!AT%N`Y-B3<4@> zK^D(GxD|}L;RSz|2WoP0qafDoIU6qHcfR(7Vh#Fbr-q=P4->k9`hd^%=Hr9zrf+4g zQv*I`l&m81*;T4XD7luSRFn)#20a zglOn!XzEJx`Zh?xP(VO{pzER>3bjAbM5~oCzgb9)9!So*$)r)}K+5W*usxh2PR^)r ztn^0gZV3Hr3hJq^RCl|C&4oDSm5!&(sp6qX zFYXQAWT>};)(jjxJOz;Ljg}b+aHf0}Zo&Br6Ezf@W8wPx`l6=VE}nuDlK?IUD3PFuE)S7m)x|uvwQ*{RA;Tl|?`>m)@%hNw-6WR(HOTwN}n5qdG$hwC)nc#p3;V z+6bjMt!HegGCSCzi{o*^HE1ar_!X=zC!a?ipnnzK6C#-$nrA*o4hLX;ZqrK|CPiys zpIT{1KghlLQ43-Uf~%MxNj`qsl|Qq-Y~xrSCr9--8yLVt8ICcGxR1vUkFGuD<@kM~ z;^6hqazJ60Sk3m)RR6*1PzUOpC+?Gxf0mbWgZFj)jg}^!y--Gtp!Ws)WuTpT*kWGp z{pLJfdXJt*yJPMLg_udkuDfr^C!!JMgue9r+*?>&P+^AM8DZ@-ZMnlQ9ZC`YSgpKY zvB);K@^%EZ7cso0J!k+S)we5z9Ui*F6T<%UYa~!dS9i&s-onD-?6@+p2f8uwTE0qt zh1|&h{aMWge2JWb0^y<6w6vbG84$GRh`C`PgvZ0S^Kg?I`z6Sug$1Wr6_}70GM@5HO_WN%k7^yNgcm+jFE0#)erZs> zW@Mw%t=62^aQTF%@#Vqc^7OA|Mk*Re9h)rkXX~oiQ1& zz;CIMhIZxQTBuHo&)rZ2vuE7pbB+G<*qB;{&9Ak|ytR&;(f&g$i;Jl|gz7#?44T1W1wR3n62KXpfeo zu2tQi6+yl(adC0PBXWydImn-f zoJw?aE8n18fACc6tv@FZ%U+`Ohpy%yDaIC$a3~6)+iS0szxzQ&b2+uudBg4I7S;u> zi%*L>vG+DMf@Aq>B1HmHj(9k-AC>i5hgn_Z9OxCB0PD2l6+_Zr2jo9u@ZJ1WgpWJ` zh+{@PpC}{|%JbW8ghNB#VyI^wZSZhF+UOt#psINMugR;>wB(5Xb)EB6Cm3lFUz4k= zAetpa9zZE_Z{D9X2ViXtsW{OLv`L5ILETK{MSh)ze?AWI+_TM|dR}6Xk2mrIzjus| z>~k)UqI}ViRRhpM$JXK}4u;F`KDSa67aukHW=Hi)(|ZY}U%i12IABRU#Vn8D>GBlH zqN%TdxVen4kw@`Naw&uhJ7Q44us-SK z(@VLEYLUVfJ$-j<={aj`q#~=RrfQSI}8>42k`v}?(VGaC1vyfDqki4s%{bW zPfZ6*kzUmHJzs-^FX-WDpdEx5#r;8nwtNs`BaJO!rjFbG>ORPa{16~GKZt-<#1cqZ zBmp!_@Xp_nsI*MNM-60x@@-GffllPlfc!}`r2NEv1`4k|p_UaqhF;QO+eC_d8daSR zKIQAct%hCx>6hmnE0XGEPh9s8F8n8QN<%#pqw>^O7az@gygy@s+PW`_S103vdJy#_ zR!E80W_`#f6Ku1_1*BHL;;@1JQuz**KVgF|2m z+$b1j7$5%_6NV2n=2VAJ`!FX#6x3(TtA8UvbQMcHA~2!A69&NIM-Uy4#w0j;9QP7} zdX0Hg8yQj~OH6e8v5@yZ>xI{}XTcTt7qMzu-u18KohM$3VO0HXj6vSr$S`B%i zpSm*|1YS%;uU(6_s=L|GQ@^yt5R7y1mWu{$(bACJ{?D_}GG~t6J({3CKP+FiV)rK8 zKSN)`*nQh3#qJJuFJ>Kh{xHVmd7LHIL_2kHeu|Xp24BK%Y#GXjmP&g1I1u9tTigFV zrY91&=9tmtY8yG-zXvlx1ORD zOx(aG6u}|ykd2Nk{989&q!)ImR&-drP+9ui{(xaPy5xYqQmv=$(-VL=n5b;2sU~Pg z`ZV{qti$kPA}a|gI-M%H{XTNnSYI$)iq@*Yo|0aJ9r=c+eV2=mOPs62HCd@~f%gM{ zTFKc-U`OtwzxEYbp7>)(9(?MvQjc)hPYCk|po<|N5s%s=e~khwt`tJ5!f~@ffv> z6(>qyI66yj!^R%GOXR}g)QMnEVPzPZBGsmEV@N3(`4kGq+yq-xUs9seG(^00_pWG< zEuL~H__T$p??`{wnl8*)I}k&1OaG=1J@BzHlaw^u~69Kqyf-^vK6UA6n`1 z^Jjc|aIRcYQdd9XJo&E**^iP9u^!&A|5!5lHb>Iu^G4#Tl>_&UyS>WQrNr{~D?<`2 zKDBuI1t1^kT|2$bUA^wY1+uyLS<_Q%xokqbt4*JXeVeem!k6W~V7Um0o6t+@JVF06 zaDf#VNAtbo;~Xmvh6+|)1^ajE*yptxg3gXpA zOfEPG)F9)o^?YrTG((evlf15ty+DW4Qbh94?%%gG$*0svUg7p_QgBH*wY(9Zn`U(Y zU|X2K#+VmyoYHl4Ll*ah0S;@M+u}i`lplF+rCB-qKlDoYPm&Ak6D@w9 z$Y0K@>7S)Xo$;>T&?tWtaGxW(B2&g%?hWA|x~pThzNX5wtwRe@e{9b~+{aekb2^^T zA1Lh5tBr;UtN(XfEAhz#HQD-mt1SFuXmh%T&f+lZ@2?vlo25 zW<~4t43CzQj$3J)iGss(Uol%YCNQ!&rA;BBo{X_OwkEGLzO*=)T@5wc!;Y#L#~-rK zA5YQrVD3zQqVoaeduzxjzZ2Ug#LwERV{cLj&_PMX-%Iuj?}lFO3zU6~F6xV+w+2!4 zz4=Q^d+d6`tEsd5-7}i2MiS;zKcYV^f1hQ^PZk{-@iQ8@_wlC1Q*w{o!8Nxnon~r( z>~NHi)3bohw@_1yOfGdZfF9h4`(Ls&GFl6VODt&RihQ5BG~u}dhp&^o?~oTi1x<^qRkM+wSj&?s{AGybe{;q zpge_*Z?$uVa33w|knnmBFw4Atu^?O==A`jdg*MkXE$YqnEXC%V=`?$B(P1iMsEv+) z^ke`W{Vo_coZ@+1_=4P*2M`$y3jA^@7LZiH{8aa~qJf<7@A9B44UN?-h2d1CMvF_i z^6zyVd$(>Bsqs`xlzVUvMuLv!iq@!&Z6Q(Zhjroi>Jy}z_>AF~?9``C{C=qqx7ck! zw=Cru*P$Viy`iK%qy&QOF#ik#&C=-G)!Ipw!nB9ZL=GyR_Y2uN32`+#8tw!^Y%S=x z1dVTw%@?O&zb%v0eO9&VNP@>rkNx|!Z%dHr)?UFIx|H;tNACA2Z^zJATFqQ8+h0lt zpfk>Y{`Ngl1QY@1p11K`%^`HOl~BKxjC3~~gWtkV{Q5QJck2Bl{^kX+IAQRrF|i!p zE)SgmiVla+A**WFj0-O=UUcGqVHn@Q+H{s~lm#aG{w*P z56g=t4@PQ43e&W$Yp#w8yiHj#RxVX^x%c%JRHu?u;O%SPR+>t(uN5;!NhYB~bJ@eN!{FDMcw-k)>!c+!B=Wcjz!kqknK#!n=2~rYYAs};aXvCyl zj<5nuG3|RC4=>9T34;(H@AZ`(gG=N}t0wXqjLq{~^8Kz2#%f_MRTphif71Mwd#kL2Zb-=b2&>N{ zqS7?}n>0p2qJJyrm@UP@TrY3-M~`NPs6&IraksHJiOGrA(Vm*Jr$QI0hjG9IGCd`_ zAPE#m^?VfGgY)-_R?>>Z!UeBU5NPw23W*RR005c(yPgXDWmN^8EPhOqERCrkeLQ$H z;TT~IJ4ofL0DN61hPIl==QD+Q7LpOh?}~61XubV8Sy>#oUP(1SZzZM0BkH>7+e^iP zhSKjr<>BwnhtzLnP~1$QlI&i+jy~>>$s#xY=q~o0UG>fMIv8AQvR8w{Ex>g%>RP|y%uRaM2&|6gSY z4IVxeVPqBvwqV@wyUYw4OJN6%Btz>S6yXzM%#nI$4i<;&^5KDXjf{G?TJS^5-$-1j zuV4Q0LtV)y&kzw-L$>G*7L~=E9|nRqKX0LYel-ynAcl0u-o?W(+UzdSj}|geKa^D&v}sH8i4Uw{u}nM*<3vJP4?) zM3U#@(xA=me}9Ba)&m{lBTKNH$zJx>oGZE(jE0EFZ_!NM@ouKBzR>s)HzvPi-H6TN z6wskzaaJ~Y33c>Kpfqp!onrUSYGrlv_z{lIrgZkw@zXL4L17nSSR2dZP1tBDS6w>r zg?!}roKeO3QD(1ev#Q@($^uvA_3w1t6%MHSTz7^rf1E=2xA{xevg47tNu~)z@8DXf z_gS^+^6d*D&bi&LYEwAM|9;*1c_Um2iy#=DHVx=pbB9~7kL5wfr6omQ8K=uo#o;&a z1v|H$9XoV0B7K60tQD%(1qc~sVxekFR6h0Z8>nyL#jl1qFv4)jkmgRL5FHQ8nKm*~ z3Ji(04ZQ?^USixpSVoJ>RjhRwE3>BsdL=U}q0~xzx)yEkP#&x0D3;T-E#vq)>dIo? zu84Ane_|E};hjyg{n#=5X?(Jp?xs-c#$IQrm10)UPr0Gl>S+Cdw0eB@6OCd!Jo)(9 zP(Mp0NUp?jC$gPFVam%bP>=YiBm$7Q!`BX?+y4QHzh1l;Wp7@3l~fAPj>!R^a_vE= z@>}>VYwBoA(Ml68l?Gjw8!V=hyNANRdG6|D^=}4TPrUy73=4uJ*R^L5U9M zJ}}`{6D7Jh(Ds68q+vUzR903d$Y3NPnGA(9`0>f1ocx0Zf(Z3Nt^dOO=Rk3diH3Z1C^&jbFX_z$ zql?AkV+tVugOKK0=!}yCRS*E<*dKGHX(hdPg4ldF2p}<<)?n6BICqmaRxEJph?BAZ zpJfZl!>S82E=|Mgs2cdlo%$Cjqq0-x#ls($MIzI4nyHIKHmr%@{x9Gz7M(Sod(W`)d%nOb`1-%=4Uv%|Bz*U)9i=f*zJ`{vc z0^edXe&=zG#6>e_q>Ntziu<8HKCE457F$H=etxZk806x%?`2&C4Sk+m^$9aA=F9aiW0il+X($px2TAaga#A5J+V4Mkw`l@#`>@JNu@qrhq5qIxVRQZ1!PgfGn zM1S)A0B)!Vg+FpmaT4asY~AD$W$AK_P#9O8jakM`r>UMT4ub{N}3 z^Au*`5N{=G;5f_HUnj1KBK^TVB1_Q9-az;8!H7q+?5r&HbID&*&I_j+BR1y0@z^rl z&G-{;V=X;R$E7B5C9BjjecuLg32NHfeK$s=R=(-~9W?s2v<^|Gkb8(Y4Ujx_h>Gt( zJPY@|?Ps|6JxXg6VMe9~iIljegMA88p)-Qd}i$VzYKW zz&$f`#{(p}Q?Y7_Yp1=9fyizH5}V^W1TF^&%^h zJtpitzq@T7mJD<8S*-B1e!#?Hu<#Z7hQfA6a&=YH8T zk7>}+(I0_%lvZ5wzjK>Y>#MdX3kwF)%qGc7OyZlEU>Ywe`O5}qHRs_}kRuG-%b}KK zk0FW7lzY#O_Gik8J6H3!h1W;!F9+6#*1X&QkhQw+1>#A`75wzXoA_v#35{4^314QE zy}E<~9nN@Xgs;eJ4sZ>CimpKKXCW?qz?g{f_kuU2bp3C-cP!j3eKZLGoojy~y=@q4 zNzP}Se#F!I;TMJdz`A?RSg`cI*+(&@s_hR1&d?E7*Z4V^4HshRP>|{h_|$x4#(!Iu z;xc9%c&Cli&4X2ufvO!btt9E13ai%TnZi*a)~+_8fBDp>^T+-Y$V}=aQmwe!Xg17-`yf5`y1(*0 zA-1nIc|CpZvFQy$wDwIl&SCJ(mR`4Z)X9i~%|WY2RY=x`;4Pmr|J1$8xK4zPTAOVg z_SHKQwCzvaZjAs={Z_G}rskFKD_27#_nOI^>KtM`{~Lk5jT2!TaU^@w`w^X4EZwfV z(Zh~GrQuK5txi^u$3iIgN?L+*pRtt~DyRT9zV-lbd5Ch}!}-52-X`X+YJK zGv>qKAd=3(^UWaFV`v>XB@j=wF1EwKw9Ul7qa@!6jIG!cgW^I*S@u_+xciPmpG-op zj=1!PkF6WW3ckw`uqVJP_j<4qXOf=v4g>pxz}GHQ9cwsVOE;n=kL~ zXBF%HpWdkU5U{3V=p193?m5zf6oR>}SFFAF$WAQfVJGV3$tHQ}`tw;O0ws7Qyq81M z-M8*0&%{k0_$^+xE<7^0 zxX;E9aSYIay^TdQm$p;2x7IVNgkFDSeq}Tu_`-on-+kn9t2yu6#R0Dj&U&%oalz}l z2}Tj&@`f=Z7o>NvWQJxU_l|rNJrDc_nJhIn<)j#d@h-A2p8ov~Hp9D+58|HR+>53r z53KKPg!M&Ki?AvS6@)TcpCxOhGmpjb-2T^Lm4T06YJ&)Xo>SSF^ljv)fx{~2)a4mH zqeF!%D1Ol_)0BAozl40LexColX43nnH{9AlP3Dgp?$&A&UVVVO6Rj&Rolm~X&|h(H z&1iamsM=c1D0U1VV_)Z|9OgagpAM!fQ4|T4LrT>H)znsCym=TO8PqvMsP3)aoe(aK zV)M8JG7SEyZy>g;vAI@QE>il{_rJ)dBp+)5P53~wzdh`#qS91v!KfkxQbCxQVy$1y zhh~t>i-lu(c3iSjq_e)a9&lzcLzKw2Q5@5fq>%WcPNb9B(2=L_S@S~X&`&vnvQH0s zMSa_r#@`SC(z-ms{D}F45b(WumS_?%J6bSshmCECZOX!JFoWe^;drgMxs?{Ci@ze+8{E1v-R`cY2fJr#Y%5tW3lrNbCtE6wTez@iWGKTCHtSt9%O z)@u0{*N$|(d3JO(P!d?*8BhlZXu0EK$!mn-gstf=p(b3(y_mS$LQuJ&IHQDn1O&ff2$qM`!W|Cl|PSy%+1iStoKAPxe{qs1MKmx0q#i|WI{ z2pH;e4AGnVVWZj7;L@`+Usl|2af%MFA2(cV7MexIMv#LpMUw0{Z2248O(&4+2!?(2u?Y4fZ+xzv*R@Ae@BWvT&)+k99@_0KWn-1N$ z7rIY&l)T8HEA-YDubrG5;px_>b)#pxhyw#@xUlgx1tLC%GdpRPRIA`_q76-SM{{hDt zja1;@9~{f!1FzO?cJrk*Chg?^B$=L{fP4QPYp3EE0t9uG|aN z{aiEJ!P>pbtU7l*gcP0$VF$|m!LsKNP)hA-@W{J7QNO$3QrYX@c?cR0jU-^GASwWLlo}1)M6Ig)xeSB6G+H#?ddx7eM{>mPOR{rfP}S zSBq$-_d3T+X_q>7Ns4igUNcUtSzd9pmJL|*{>^D<>+ z+)X#~&_Lq8O_~zueY>uiZ0p*Vxi&0-sy&@v5*Lv_J`=6G0^*SE&*fb}r&$S7BYIbG z8C*h3Oi954mL4py@&$`^?Ek(|x*m%eQ5rC4hEIqX$8BJTEw)ym?@#I1`5N|P0^{vJ zf1h1FAP^PT>Z3e8Xzolwhr*mt-f~ghaYOQ`@mR@ciKO zorg<-e@+>_cjru60!DJBA^#KGu+sc{2k+l)t3(8)h7MRiCm^cVOd}qUn7U|lMQ|4M z?&1`(Edi{5KSaN3n!UM}Ya{a%k?NZpaBmAwW%)Z}PT;-6ET$dY%x3`YBd#pj;aD&$CiQOx*{JBDmqCT8$2$507W9A8I!v{?|b<>82&E!=Rn+_7l7 z(Rp!&H8qb@rHL4vov%%Q+UCCQJrHoK92eKri*Sj`jG(gFYp;$b&%R-A)c552 zjv~CE)MOKb$P21(DQYITzW7F{Mgo~HsI+{Ctba$P4s*{`Rd3?FBc~6gFC0H)amI@N zPJyG3dc-8TXI)aewEdzdk!?{w7XTt5;3$G4_uX;{3%Fnd|M`=;>M+)Z3cpUF{cKE^ zFfR`nh)R$8l|b<<=bg^$EQ=fnhc2|Ws4|pg8ZWvKK*sSLcYCU`?|%LKx;s5cB_WAq z8LB7<+%wv$9br4yY1z=|6($lJ z9=$(tkHFmU{#1yWEu2n!!;U3kykGy(H8+}FY3T*4#QmWO#!+>u81ObBQbg$BFzps> z&UuFcFdAymE1gm(y9Z}T?yV3NfZlQyb<_7U(9pXuBy3!C1WN33ue$!^u+Ks$>Vi*x z4UVjpo#A@E_6ZbhXQy(^(AD4;$z|6r2{;rmR&oYoohQ$pgC>jOIxkj*cc_wu&UpS)kdwjv0^IzZKaMHxh6l|vPp)yzh^-jW&D^rNaR|tfJnl;o{ zPEwqO_^ic5sOWnD4qf7z@{?e1yj+I)f+EiD_redhG<)4?6*S*%QY(QWHv zYF|=S(9O%DSt*wv9?GC(L`yYC3kDr4yN-*kX%ibtApD+!QWNHqUFWhmr91au7LVs| zL%KvT@Nv41JU*rz5m)Ljdw1QJpBI;V{PaE-g;?@^&&a2zoPx)%&~W(i0rmE|7JV=v z)As-yBd86mFyNu%tf58n9gI`!FNsi%yZ)=<%#Ec9WnwR0&hI6oqzr%g^5u4g4=xh>I`XyH$rL8{5&v8`q@B(2TP-e{NYyPGRCx3%TgkD4M=| zgy6}J%fnB2#KGKHfM`&jHPPqU-3PG-aWP>J!Uc%Dr-~t|aQ$rAn^{vtC{%ts9M*67 z7mmMWTRvy=^++~d0k|q6%vzsT8YKNzol*5wY!91&zS{kb$@?8e2NzkVHKvm%sL2fPr_$Hj%Mx>6e z#Hz5$obJ^4t*QR|HG8t&+i$XlYxvi%UxL(Es%CTY{2*z(QHh<()!EtN?7kZH@7~SD ztq$GiT1rmUSg(5w)4yk@25*uA?EgP&jM%NQeHF0Ah)NR4iHK5;t$!motp(eumBkm| z;`g@`ZLIf)K)=N>w#7tbyWSG5#nLKciuo-%fm42j9Ob+GX} zf=TEd4pH_mVvpNtO}t{b2EUie4S(7I6g{d-HBSDGbP2MJLo|vyA+P=5*G3+g^NamV zbahE6q10QI!50&_q#F*yZ>Fl_8Km*hHow#qBZgG4HG25xA?GawVjR@BofE*t{MzWr zUzkty+2w{C-e)K`1jUeN<3{DITue)DM!V?|^JXO2n)vp@q`%W` zk&{#Dozt?Cl6F9K|1j<0vukfQC5$u%Kn^G3 z%LTsYyI7G$Qx;JrR|To{b8i)RbLZgiL}jvuc5HnkuNZt#ugG8NpElv=qlT1kj6r!s zz^}D5FwtYMsk@_mz+Msn#q^sqbvzKu5cit_7{LdYP7P?~3UUEKc&0_$2_>X8Vb-jY z`^0zMRb3fjSNKjWX^;{8^M>=3{R}O+sx|{DW*S~x4`ySzw5kTIw#H%ch%f?@5;kKp( zh6z8dh#?98<uHDLRM;fxh7Af~)3?ea^!4`;|sn7%^1i>Q( zU;DTODjVHikAE}cpV}^qUi$Fi!`#dQoUzdp6Z*&})p?!yMa%5p+o@2+u(EF*@&zIg ztQ}fL(hK+7{Mls<$V&A=$U81HK{mb3|1Z>{<_1ey{S9Z3*TjK!I z7BO?TU-u^1YzV<_(TQ68IyD`=1xB-e%qc8Oa9b2i7)CLp=Ma}T{tiA~U#_3PqZ{Z2o`SDc2 z)ONM+kv$TLY||Snudh!IIq?%t!i6thAbBXfCT}sbMDB+emLZbsnVp#&5|ShbOvhD6 zvX?692^j7slliya-?}{fS#DMBn$ZekV4$RP*7}p%NSCp$KxAj&zY^-tx>*{$XAvF0 zsIS}@alO$E5Y7dSTR;k@2*y-k$AMqfCv5Jcb(pi5g(XN^;p8b1^ZC=*hXtIr=@aOFyF1&hdYD{b11ays#=x*jZE(~TLsMfKFA`Z=Aooj0pJyFj=> z?POJo&)A^wBQa~tRU_%@HZdRCXqRlNs)AM1=mYt6(}27-T9Z_TH+-)1op}|Y-A4r7 zg{!D<9)VM6VW$&iBzMd2a{jY2aBT8FxY3eGGep`dOHZg1M3&W!gM&zZQ1aVXLn-~V z&^w2YcK!NwB!L8isoU0{z<+Ut!X98-=HP=;i?|pcw;`oZXs-K<9b13NblsasND?r0 za>e6IK(u=KXZ11oUxE1K*?0HL{{6hS`!P|ko%mG*zp#N-1C(0XrUqnnh$lq~nM6^LM2TBfjg z?hkHbh!$X;*>jKSpPfM!-hBT`{YXuq8t_#UP9bqh*2oL4s`{BLIBBwEO9k^xWA?(dew%khq!Fko=Q}2jC z5FulskyB9__5J&ON17IbJpfT_L$Z6AX}PyQEi@ULxA-l=U5V(6Ns`EL<-ARxZ3z51 zM?SPLjGVaKR$@Ss%VXMT-JJTw|3G$A*NOGik*@y>5RH#%>aDS!&|PnsY_%#7?){@?F8hLZa4R##U?#>4>aJ^2S(f|mC7_J|>6RnE$a z`E#X30<_CALXcHBXY`b8JH=%}I-J13K%~Y*HJ;AR^a4!3XlRgF6gYGau{2_<-fL|j zyuv_(rf?Jbzk*Jus4cTZ9ew?<{{DW`hd$rdVa?W6Nm}R4U z>w8($15X)uf^~Ko%pxYA$hOqCV=a&dL+&Nqz}LDRKq9Ujs^?d-)1`y?VoK-=#o)6R#!9=|WZ!6SFk z6?zGBq!2DX@NnQ7(u{8Ymk`wAY+-RHTHsuQ#0daH*Ne1c_=gKngLNiCNk;w`ru_XsBZHcwbMg~k7nKVhn>T3xB zk<;()(;@~6&_EP|CkJU~hl>Rm;cZjKy-6br<%QPA)&4&p)WReQy)i@(t`GseYzzej z1<>0b-1Qr7ai~;M_7;)+@E1IuQ~^Qj){Uxh;!uT$t-m}gtT#W&$gyWesh+}{S zI2O4p@t$Hpu4h04h zglFa4blJ@>FyPa=F7g+fW2aOyyY6>BSl?hAmoqaP8jprwzYQp5C$Vs%qN0%OE*Q`a zwpZ@`L*p{vNP)D*0JMbU8N+m?nI}UJ5)DXgvd(Fv`n&%(zA^Vgq^DW2%4!;nTvczC*|;Sb@2?7eu<%5JnJj zA6Y&i=sF*w`QVvjLoymVn(PINb+>Vwe=hLPxftZ49Ycp>zcdW+#6_irVLZiFB~du- zjw$)$A4bJB<4B+5#3@{knRgrA%0SG-j+t-~_T8*W{-vXHES#UZ%e%uDIv=twZmjZPE3*Xu1lO? z*YQSO)$UmcIqeFU8&Du~3Wp1L6b_y3iekeu09$m=y`T^ihBuvJ8>c$cX5cwsR2K!0 zU$(f1pu;d-{ZDBaH!3-;|337?TEF?%kT{3nQwC-CU((DtO?}?^QnTGcgw{Vi-218M zX2I>(`(GUL#L0=9kA@|oKI&>JuStmS(K<8l4n%kX_=SxIA$*mM5L~s}d~^#*^@G5Z zUWkr#-N^9~QOi|Q5|v0XYoEKrYDtBdnyIjQXQa=~ENc~+$pBC1G~OXK%;oADqkGXM z0hH3DgNB>nV5EN|iuJR_Wa0uw($qnUGeUSsd7iLF#A8&x42W5y&r^NR-R?2a@V~B4 z+Sn@|=>OB@miJr(ftBjNTEiyW4EzI=&njty%M%5F@Cz1B%4=mC99>f-^SQgKTDj8a*RMj*Mkd>}!d^HQ zg5gFheDmfCWJvK(al!Hbx%>{`;=1%aoSbqx#3T3rZn0lWA*JtMOiRjsr8kaPnFInx zy*YW&N^_wLw-=DC?+Gs z@LZ}Z9KJzK;@dL(+xTNFLxS&B9>ns2_n+lsXj$DC@Bdl*=U3BY)8P3T2h`$d9}_%CRM^f0?Z()A`%BobDbzb-0ue`T0WfB{AoXTzK|h# zht)QH+3&y&g`ccZGhyQeVJG9K9M3EyaN<;P(JknOa4OuZUoKW`cwZbx10|*W7jju&Tc;EC!==Ab z5I!_%GK+%VFrl|83|lv4Z|Aa>J{lMAyepi;T5%CjIzaFw{{2WL5ux)f8~=fUt$nv6 z@w2^U(=&FbrXa#?7lM>2~LpBB7H)6=Ca*-&u_sReS+*nVtJ5g}JaP-)xH`T03aDajlI zNq?tlr^+5}{!GY9LW_)wa)Zz>DPiF{^I0=(2=uwzD`56M$k58FB;eeu{-?pOALF+~ zQ)?NY;B*XS-Fe4<*YfI(USc-|~`DF{SxuFtwph8ZG^rR!GbJI}zzlJ$RhMd6ic9s)c!snt{J!6g4@yf=?Fy zcuQgrMv_z=?|vuNd+-Hgtt0fo-V687yv6AczDm{C*Pm=*ZTBY;3iSkE(mDTKHhU^~ z@oOaU?3~^@Q=m2IO>J#BAKjOkcTep;u|*iKjlI}6X?pit+C%!4|K3w-stsa2fxCFV zevNl#W2~$M&&SqQNts2dW|U**W*E>u)-MpAYKe+>>u*mfx-Ey@nZ4;G91C(W>VGow zPd8cXJ*r?p-);{k6k4metMl3VH=TAiYlY#sILoPm$b*}07ePwiuK#={bPb^Sr%(`o z<>U+#P3>#3QtjTFe26aE?+oQ^zSK|pjV!!WRVao{(pYBTM2%tUcHpGjuRZM0f?)?&K71Uux z55Io1J&8`2=ZVSAJ(iL>G}O^KFZQs}G&LsV?(xgZ&#b{~Hr2`hbE8+hlhf37Iysu{ zCZQktCYGFGHd?`>UM8h>`aS;Mmd6H6DJPmMh$3rNRsQQ1< zrM@U%D^o#pGo9Sz5RuUjKWVi5H*emU&k~c=c^0yDxKz?CHdt0MFMv|kM4{S53I1~< z#$Mlka`{`o^Lrf^A5Jwiw7|M6Ev%R?Q=--Q>R3M;h3^*dUDza=64z-)6*Lk~8$Y?i zQi|unh9=1~V%_{u8-yFqd+qo*A&WXvW27lj94|BVNSJ@B+7igd=se9E6l>5TsV7T@SU<-z1;g&!aD7TaIO?Rn>Z3rItp-%@Y->b9Jk#!j!OCXcYT921U?o${d2o zOmtXXAU=cw2psNDpFWl1lR_aWiI0KqUu^}6Fp*bl)=4kzLo*+GdE)%3Z4xrFP%vY% zNJunRL^LhsNcn!UYX%3moU(FU4^hRYfT-iWEkEkMa4g+s zUCTQ$C~RLi_+sx>P2W+sDHs3^eFeQg4X;TPGxB!2>v3Wz(qsGbw-f_TEYgITvqf`M zenjW?%Qen5KBz;di4JZ8Ge6T`jT@Pi?(vZkgpci1cDSUCTBbQoss{rYRzmy~IdB+S z;@^i&kp!-~_J}JaD}mmUW+S$b_!1enGLQ8?Ryu>(#Y7&{*drtc;uD7w%eJ|=6e88X z?^h-cjtPID(gPmD`}K{dF&^D@!@_V~d|FGHHgLZTcYcKO+qaH56!F@xcPIQFKSg;6z?Hx)YP#N@i4L>PfLWPM@2GP%NuAzPN7*{+qZv6 z$jX*EB%?nDb#N#)tR0S+bIcS}HRXjGRD^j>ZojV0T-q&;{(r9IN(pV{Q||qF>4T}S z1H!{3U>90?RDla|(LIZgq{**SWgO7CWl)U}Jw&~D{L^bhm~WGgS%U=xEu9sleUwvy z(eEh|l;>yOY(&VKB!zp=aB@Ez9)qnRt*!OsNI@1q z!#Jy|nLi_3zps0|x^rvs&O95tnJHiDEq^N*8?`Vg@$T{e@Xd5OrEHkw>N>{9HWaHj zxrFvM%m2sLTSis6{a?JKbe94WTR@~6=>{nY=?+1$$ zp8s#$aW5~<3*!uTJZpVp&d(g&!j;i74ofvaSfHec%fS#$2{LkfxXgzKPR!RG zi{zn!NaAH4KEv>3cj=8R5k!f?D*vqaT4y%_mWN)#NvD8jETm)G;Dt%R_6iq7 z&AsOg)GR6ms?5Xo{l{*Y(l{$Ovw6aql4aL!#Q-o&z>5sT%f^26cqIpmdG7o1hd-q7 z4C`sY)LuhN3nn1&01SgVz{nvEtbR;@xB?Wl?jmkMUrxJaWdDyydN%WEp`82NJG6TK>;J40iV8{Ak+z?6xJX6&v~I8gE)t zE?cv3jpcL5 zP`M`59*G&f;@)U;6ny=!={=2y6?Ym2>LV&1fmx+KW}J!C2<=pU@kx21hDLS(J32k1 z=Ci{WW5-|KZ-g_k84UyV{I%N%Qu_As@H2WcjxytLb55kCi>%_F#!o=S7?{BdZZyE3 zyI1(oFS&Xy{X;)B_8>_CE@J!6qppWh1pcy3v4yG1}wF6kddtsoc{^zH-MR4&o0P>lZO{f6*ITY~P|E#o+ z-_NRQzM8Q}yx;dZo*Kn*+P}%sf|t8f1@e=HVVxl0PZ@O2RlIs6))MyOIw{BW z{`jXq*G7cnA$0QawYPGWRwKFFfI*E=5G(UrhDc?-1`R1R4_~ z{i%H~I&3isW#g!D(v(6W2MasCERIFh_T&XOn5lb>BK!pr!iDWi%@9;S z;`=Bz>|_rNbe){G?nqoF`QJ8$+8)mJ?s{!#s9H+_Q`53&-CgPqVR=XF7-m@c@H>?2 zWc2{BN}U^@rp}by`Ev?;OSZX{5dJtx-e`#4b8aboXu(V zg9hlpYM$U>AzZ8_s2qNMoNe;+qdXj}?2A{y3!25fdG;YRbOBTES z-ue7k~Han+JLEmK+o{>bQO;e$o^B&}$~7khA>F%@;!A>478b zU$WO$Q>)eO(zp*kh2nH7Ptc~;oQDEREe0hI=f zj*t+nWKh#^5!sU_Ycj4{nzS|D<>>|J3T*_1FZIz^BRYFK}c;i zZmiFu$&Y{a{}067yI$J^TXlb+vsnS3y)@(jYw~o=5PnSvmcim+*Tm^}>r-G4Ia;Ba z3&6TU<94u@zyR{bz1e!Hl5w-I@94)vaHPksr*_}D_63rEAtO%PW#numw>$(CiHs-= z19#4g)wWif2QR5?f|ye<8VVErx)$m@b3w?HKq zNV^ulwb}#gq%&h_|7YT5;I?ZJ^9ctoq&T%&6XlMBDK(dw^(s(>!#o#{GE;S5-E$C z`GTmi;NajfZ!sWS0pOkN7uN$izP_#cSD3&~3Dl&VVDJdVfir8BiUSiOlzalM$Hm3P z{YUw@(y$l(?nI!GIg3sul!zN*RM7`jmJn=r6C~?ow(-Mk;G|7!8cI%O6@x5Bf?`Mr zbp}pMFsJuS5|2uZ#uw8pALio2&g z$zJ-`)_Y`Fut89Gc;pni-zfA^OkMd8ps{Q9{Z7u5qOeD~ysQJHqseo?AE2yw<{mi+ zV4tYJ+sgtBbMU9$=5xwiG9AtNedORWqCGge!q>>#A(sr9#ZvPzvXOO3$p`tDW2r@+MUHdJ%?jisk znSh%4r3Ht87dxTOMj@CP3j;Hzyxp?WPDj7NG~oUY>Nx1N>g%)?LXg^WS4#;p3lk;Wo^wBy0f?-vz0UZ}S- zkI~y+kymXiJz*+~CBc&qu^gIBiyN_j!^u_k8heh=uq@7z;#%OC&nN4~)tzNnFK|*l zc{y)_n}%qck7G75kqgyQI=WqGtHdOkO{KnC*pJL?oHktD`(SAwklvnZzQq+{%QhzD zF@+|+z@0%gRBAEu!~#OkR}a{QAHA6Hx}wzQT=-H+njRI`Xi)yP=+LVBnJ}>^woMvg z!HF{5Qi@NBug^B|XzvYpLH#&4x+-f5AVRjfy1KZnm507-xhu>WTo_t?!JR746T8ivo7z7u0;FE*buh%%)` znIWZhX<(!zqgk!}U~KmLP*G$}%+>6!hywJq9mhmH{Y*9E4GxJFNat=C?JfL&Kzj(c zu(}VJFFkXaf1VmIJ*9?6$amk_4|f3}8{ArzaR#MiegN|2QRYb%sPbxMu=V8F_5GgZ zQvu|KMeH7;x3&XuAg9aPv;So-{bO>@7ofBSb{ELuTR-oiZ-}P|*b#K{j5XoB zq(q?g>u2W8>4xPD>BX8Q%ok}^@gHdqvZ=&;c>Czt&KK8?{B;EZGy?KK0|rRA-cS%F z_S;g!3@GzIlD;>ol|LM3#NR9Ee)#bE2fHj<ZtcdkXY{V7u(aR}O3EX&B5(0ek&K-$C0Eb*+-Z){#0y1U7Izzoqi(KX3 z*$CePEjiF!I_sP82Q%Khf685q1;E=y7mN#v#xayoaSHMa4%K&Go}Ma~#^xvK{DY=X z1%1H1zMaF~L#Kjap4?UH{$sV+6Jblj69k>yGECl z4N3?|kK{gR{c5Ne%A62q`vCERz#bqS&QHjp_bY&?=D z1jlt32)}~Ewmle8Had;KY6GA((}nAkt^8!^T?Xud4XFNm&>hfH9C}=0QZlwJxxDr_ zh;urJ4?p)SeY34t7rxLVVUqHyWAV9fYe3q0Q^8Om>B|AP znNFh%ePm?h4}emK%0(bF2MAFh>V$rBXFzsMCvd!0Gy!JZk1aRA@eEYz4Iqa(niA0` zre-veYLJkHGjdJeGIHUo+12KC+l6iBgA$TU!=1_LJ#2Y9FsS(Me6)Y+USkLl1#_HF zJllO<(%x+Nr=P{$t{_v}v8dYzBUwQ9%qdGW`n-QJ8SfYNKjV!}h1l3V?G!2D4@--O*@@M5G2LuwabBx|i zfN!hkrUnUNxP#w3FpRH_!U4dD!&{Z{QhI0GhXn+|-Fzj~*&{6Vz*x*=?C4;51YEh| zsae!*YxBf`ln_PYJi`}H=JVia=FqS~5XjQp%#AxS@z~$lAH{@#(cWL>5yay=(Ewz@ z;dD5SQjwtb>udmU3663#(XCj$W`YIk#Q#XN%MA{Zi&V99$zaM+Z20x12LWy19rTO> zOG%RfRS{-)_I0RvWx0rCUC#4C@Q)R4VZ#D`p)k1F2Tba?h(J7{O?*TIs9D5)_}*9V z;lGa|%*KF{AV;t!itzVGZL_m!a0!r`)*_0&SLnWY!kN=Tec89YEzEupBmIu}k;c<0vm!6D}#AODM!T3}vh1z7_E zIzb29waC4K#^x7MCBH7H#(U8K&Zg0lEciX;5Z8)9z%;D~z|26PrilsB`)@aqQHS5I z^>k_?UcBmj0~g+E`~Rktz5xox=%`P^7tC%DVb0Cv?2Nzo5y&$Alxu#`xXA&!m?{&0 z$vAIGG`{q}Sc*x)C{?;ZGN`lU(=(3QmiqQ~l{e$H=;fGL z7~oxD~~8k&cRu$AQTO*@5qP{&v3wvP z70flHm|yVPv=)8x@ky^o271YqL9{E+ZlM19F>oQYIEMRDYo$IBGfc-{HWom63`s-N zq#^K3SKuxyhrCw7Is%}%@C%cPP^oRxAc%)VxexIXuwQ4yRNZ=Sg>J4|YB3=;S zF9+8xchqx3EXprr25NSycGW`PL=IR{5N^q*>zYUnM#gExS;GQR?~Qf-DO`t}nlSd( zo%~ZIrfpmh+cwH{15<;J_h)Q&91NKP)tK3QPqT{$<{}IPV~1)k;$WJuYX6QhS#Oa^@$6e+;d0t3a~IOPUNIaAqF6bin-Y z`fYV)m(XY-W0i+jweoqaN}mjEr?zg`LFi`h@39vu%fPXBU(k&aI{l$^LuY4a)gqn# zzY#)X8C5g^bCy6`@|m7&R=wOf+9|(`jQQ}gq7lB69L|u|*}A3KNT*Qb)zaknB*V(1V7Vcki1GPp_bPo83fcMq<# zvi$$i0z?w0m`hdS^s5H|2eR@4=C2%>|n*P6zv-apoAtff+MmwA42upZK z&oI(+60sohBuI)t!oSzuv?D(d*|=^u^!m3v*|HV5hN%O#z&R4MtbciFodI6lNF*#(qi0#=1_RF%S~T-&G#N%qjk` z6bVBr*heMTbBB9?iG5Y#^Sg2cT=8 zp}QSePu};V&)T65TvTVST=q#DebLnCq6Eb39UyOVE{i;{um~G#GaAC4?lkX>SMk&! zI)3tvYb>L((#m#x%=D86toUo&*)$DbZ4TLhY!d0JZQHcEU)CC5HdpaWaAiS0^mDCG z+I{PR*m=m_OUdo!lW!uk$R}r9Eug*=RmChGlSmNEqd`0!;Hh8G*@{Hkm18-0|Dl5( z?Al40CU+$eZ{SytX!~7|w8okkN%a21h*;qYBEuH|!uCJ>hAhBuxC~~UDu4-S1-+$K z8jcj1f&aAcjk}0U0WqZziGhe=r)DO0ZN^FFX6OX7LYU)zWY;@$nTumpf+4;>q(Ed{ zS*06Vh{)0%O%?E?!TEXoAPP3X8J91U`vCmXE1XzlOAj^4hAisgwalytswl`RoXb@v z__=D6!{N7mkdPFJd##yyOCy0OP#cJOkrylZgz<+S1#VN_wpZVH#yX5Hqi6_4b)4-w zt1{q1dnS$}LJMdio&2Uuz}GW<_ljl`gqKw9<(h{ffwpI~2~Cog$O&+ve|@?M=I&KW znU!Ni=HRwOa=_ZvDv?Z=d91ACJ8_gzo``r58!3JM%zm$p;O|!mxZV-ug!@@bh6RUX z49JuXqnGmDX?n7vy9mex4&{^t=yf0=!(2WBB13~5zHHr5PgB!z<}Z}`h<5F;s6$uv z2S$|lBPEU|9k*~a+)XcB4`@J%??g`Kv*v%9yqLs`qzVOuNjC04W_#Y9(_n^oc#0aoyDt+D1tRE zll`){o8CRd1k!7wAl>~0O%?fS6UD!-_Dk7<0gi$DRbsT^j0KXI(sQ4CI_2^eU__^ z7mGdLI3h_?W@E4$Fu2Vb0NBr9$DB>bC>nh5xSWXX6kT1MbO9S~xigo;ez!qD{zlms zM-Ae&Qu*x3fiw^S;bN;G`@fK5;Ey|U`t;vc373>EUO!rE>yml=9q71uzRREl=%tzO z#@$`ouBrCxn21Yf@$z(_@dL6&n^XwYSntN-DVgglpaO*7O4Wiy>I^*sCO>ALj|SJ= zBrgJK6Uu9x4S^cvZa~+$pSIbxW!Aqci!%35yyR^2!+)Y*Z_|E1x@9-DDVv$|j-){k z9|>Lxyi~lEr$cl6>!EEe;=A+9OzxC9SChirQRv%z8-mt{$pUd zAX#Dp`yNYpc=;#I0TAqoA8ox81yG8sPO+;ARkG>9alc7T<{P=1g`X17!{*Y34BTT@ zLO_cH?ip zPtDt@C>YigNa()@+v2po#u_Mc!ZZ-6`rp|M8b9jWGqi{A^&4GB8v6G%R!(S__W;nC zDS-GeqQ1_IJpm8xZKKHWliSXAe1MzRE_g#&_Ywk}X_e`PfQ{f1Al*Y0zn5Hnup!Xm zu62to{SgG-O?*5RtJE)wM)otv7X z|9I;6(TK5jX+$#(Zm!Q38RJ3b?WCd?f!da@g@B--1VxJlqlRIs*%e=jMrfc#6lcGwco51BJ*ta--`u?lgq>YEKx3d8MgD@&0&|w5%`eK1ROwW*Ib=2 z3A0Lyc%qFM>i2=s{06TYu*!sPQ2?~388^+A#-Y`E7zotc&f#fv%ph$PJJ0G!RjZEd z(`k@Vd6DyL&@O$R0U4r3g5Jz(ghvUc?7i7=EXCu`UH3Lt-tJ#&>C7AnG*gM8N+@It zQrjBK(i<%#B4241EO^h9aJI;qWS%RI{mc~cCIt|Bu*_)ta1M;%w4x|=(K9k5JsMs-KAI|40`QPY zn?*b~Eb%_ox)*aViH^Fy5<q3V<*Tp21Q0+&qxBHG zM^j~x85NaqEGJwIt>%6Q7Kl^Z1#Gsr)lu-Jp|A_NZBNGPB%w0!9`xy#?e}&5R>{o+ zT$DcH4EydAu~%JQnk`)q%QRCb8h_=I!oG;+t79a5b(=o}g0Q8XomrIsJ|i`$rA$qi zC!IWsJW&R$-N7xyU+a=@;07esWZ~H_kJBtqAQr4$SfU~JW+fPt| z8=|Kwj{6{xq}8e<#y?JSXKSn@VBvFh=wBjNg$#V3lO zqQ;UGDZml}WG`2WYLm|bS+5Q@m%_x{M<=}Rq*)Dtp49npNg^>RX}saNkr7oar9dEX z^za*+{XKm(*a1Acetxuvx=Y`%>5ZkbZ0*n0h4kx+HWPes^4-pv8a zA;ll$ewzU&Ogm+~7KBx!&=z~8klBzi@iKeR{!p1IS!+>KmHV#&o}ziGuAd@)QjtcRrg znr>|i8DVFAvi@@os5E;s@Xj#RORq)cit0DY%zK7(EIb_^abxtLz>eCjW7Nhh$P*Y zNSL{`WLQkuQzVE@{x~0>p%IX51>@$_2M3=n@J)*@Qa7XUH__TBDEHqqv9Ym#=RVne zkTz}S@jU|q!2`{gi+JdN?ul~*T9~oAHaRIO%Cl#ag zA4Wribt^mYW)*XqE@XdAJ@O`8aUJ-)7N|$tae*exQJ28)lTnuPQ-1t@)wR{ViOgYH z)1#~vY0bi{0lbQ^xXn-XPan?ocT5l8aT3>0G(8t-S3gxeyYhW;ilFaArWI zNmVmm4`ZhgiyJ0amMHw$$5!zmy{xbHVMPCkZfn7BOzqXO*4teK*_V%pMb0UKWkV7QVgti%ephDTH%E`QQy){L5gM} zeIAuQqoLIF8^@^mkoo6o7YiCHw?+rqp-oGG&Y~ILmqy;Y*&T-U2ORctFfJ@*(xdOR z&*A5^e*GT-%&eNMNFx#iF06>%{lOz7B02(ICj^A&*`hJPqz+?a!yOjAVgY`uA$s+{ zSCGwH??p9+YKhA>t+iFX#z$PO2>NUrFuoU2l6mObN!T3@F4&I0LfL4)`l&PKxVF@geylv(-PfNv$z zUDs+mFIcZ5kfYehRgzjp!9rRO*8INmA_#eE z{yarOCZDfKKDix%`ka|s3Hi7zTU=%U-Mi+a>?9Q(?pur~06i+moY&Kp{W`&-RR zbYHinPEXt)nCJ**8fJN4C2~S18nqaUZB9jl^m8p1AKNnp*EK=V6dpt~WTid4ucG*h z>^+CO9Bjpv6|3d_71PmHyZF;-8&4@|kJX)>C{SoT6~y^lQB@<8(t%ySCbtCsp_7!h z8;<7c&guglPg=BJX6FKT4OwwP&ZrkEa_rF#%SJYR44`nTuMn}jz zLB9m~(?8}CQ;f~NuF9ZWpD^_}jk_qC#%838y+ikEr0KAnvuiaBr{O%lbWU`AuhUFc zkFkNZ5^45shyfqdfvkp+j%!T&PhLwL(g$PuJ{yxH#5lN zU(J)+n)0Q`JMp^A&Cf0#6A;P#2>+D1AyO|uYV3cC^oYt~6H{PPtbmDYsOeGchGE}C z^r}qNU`{LHzN=U-bn3C^5)=R5>sTSF6n zH;s;j63o~_@aZ(5Nj>xlfsuzF9y3QO=gB#pfqcD>7_MuI`E)!Dyig}vTtgFzz=mik zRtwDnK>HOA7Irkk!gC|me~Sn;dNB3^7PgM}myAa@;rMo~iBT4E-0=nVb|iiRhG7MK zJ?1a5_?33na3rm{J#d;KAS^5_ZOTDWCAde^`urN1yvBkS z;gl`@Q*CI<94d|`qSHG@nk4j8;$;lH$5*6io6(ld)yx=bPxPYT{!!S2VJIDaX?MS* zHLZcv24dkj0S_!B{D5p9kd{7z=&>2tFCLyYdv^SVjS-A1?aF{dR0H`L3gRRE2JyUS8vh6vvbg){pSd?T6lJ9O7OK^r&{ebQ7vGkBh1>r9-)$i;O zysd-00YFLC;CmwooglI?!T|jY=@H{PgSJQ?szw1US41)NoeT z>AE^wZ!a$2nv9*-Iy}hMwdzd2`?w@FTQTHc20UVR<-e~I1f<={`A-K}? z*1N>vuX~cM`njz+A79GdACg70kclNNQ0`KkZ0j4_SzP(VHf}SgtC}mf{8XAzzeDkt zI`P5%-DoP)?;hWJUXmX5W)V|HGgpQ;mC)&k(xK<<8?*)$3i4Y0Kmhr&_sR_#16TXv zRw2?$N(vdE%11&D{@dHzR4$9iyivM;Pm5eGvwvU37+XI^TQp|R>GuWIfzs@5(KMZK z42qRXa5vY6R=Y#AK#r1hyiSo)$MW@^Z(t5V+A5S3QY!f(|K#yyyG*dQ=jT-GAKWFKP@nN@gj%%&Ta#wCEk@l9wbp_}sr~SfqNm|WkTev^nId7TM z)uX|>AeI{WhM$+CMjvS!B6_ge@pLA&k{)HhI5&0t{Th567M~;VYmdIP`36nF|AD%?kpfc*Q;KXqrfvsAyvBp7sgL3R9Xt zeGt;fkhVqrBrJDe8f271i+tc~?HVcitH@Fl_~%e448?LwT3rnYxUdqZLAv#H-Cra; ziB7F!#;Cb_Ch3xuGxmor&_8S0W@u+QOzU=nf6h&oAf0{PME>PH!a)vdHQmB|CZDM# zc}A2$*Nd}s+xWskH&rc-hio;c& z7-ZSgv|jBC-H(wK{D;#EKJvT?)o=sYjrI&#(EQ|i&O zneu)S)PI2_Jh_0iN`2^Gn)o8eY{rimgzfuSS8USuz8c3@N&e1AX7C@~=}ct1tRl+4(>^^(@$1Nj*M zG0F>wf-#__l_HkuUw6TNz=Z-V;@yBMm!n?gjq&f7PR+g(LiXnBb<4ALoPHO*Ki7?O z@bFpB7F}6S!vy2XN#27gUtZ{Lg24_KRQ`Zb79ZkstCh0u&&-WaT}s-`=D-u|=_bmB zshp~T%8y#K|6p~Lr0=BM)N^7wS0|BybdbN$!orK?fWRAK6@=gEa}@*4Na|B( zSZ@pk8aq2X+N;0pDrpi>-9oRTFrWa*Sd1__bMm-y?b*HR+T?kh+?rFkgDp}hpL36* zp^t*S`#xQYi5)XuEtX5?<%z(v@%vG|;QJUx-k|Z-S#=(yaV?-zM@7AdckggVu9T?X zN}x@KZf-4f22872Jp9@ci05E0Tg%t;3tsq>qDD&gLy9*RdgpvOGe+Y*l%}sG!fnba zE{9uh-AXQOTW&z*GRfrk2F5f!k|#G(s>=-lg23k>4Y&&ma}#!aQm|#r#J@#)9im^* zV;HPBABf`P860%P*svd^H+4(|V$N3lG98-HvHOyUTw^uQ)zTnqq|iJwPj*AmD9O+$ zV!n8}ZPTds%ss4R41<~v{?jHRbkb~sMkaeN^<$y>iu;4q*XF234ZrKYnV~$6Kw?M5 z>*ytQ9huF`m*MOT(O?x7Obo>#b1~sbxfRt^YbswiJ-X#7x!Bzog?}v4PikQ!Nc#3Q z;juH2TY?MoL$_Hl@ST|h!8oW(ix$@YyGlBzXhBs_nCz-8&)JqjO=Z0u+5_$iUMWY; ztUT`3`5WsgkM-YNxXR5*=NT%XvM#J*h9PNOa0^l+lP6t|RWe4B@{>jk6o&{B*GPsg z-{_}DnoTp{{OY|9K6tMbqSWB2jaU2tQ%SJ&*)4_G$K4Pko|!qzaBc4)Y${B#kfw2+ z;@X5)@gYS zPBN$*I0OcRRp2_m-i4lb@S`yZ;S$3)MMnmVL#Sm#&ALJ;ecXkS$%EF*>Rq4c3ICa8 zjkaBn-4KLJl{I+geMHa}$g*11U1=C$F`FL2uEkN!dDTF!j^T)_`X~B)FJSfcWYyy< z1GQZ@U+h+;Yh?C&cpjizAXg)}#9=efYDazXSHpx=Z{&-# zALQR;s!~}*a^KZ$5|n?|FPC$$K3r^jXUe1%!;zo)n4ymKk4S7d#^ZD9D3JSkv=QSI<(zdaWPBcSf8p1~7W+3=bl`9sR{%okg*S`{ifCxGW3q3>^#U+X?i6|z zg%a?y%^A><)?d^u*x8Hi0Nygx~yum2siy=?TPj}Y38NB)E>jOcX-&ad4+ z_Ya$Q;4^wQZZ=Uay&0W+Am5eVxD2k@<|26wDG1C1Ns1tzCx9Z9>eu7S<`Hy-;&cHj z2sCX*5C;=x`21fB!MNn#56d0Hz4tw54!pdDj97h#{pisu7Ka09&qdO7%OwMuw*>Eh zOP)=aA-L;aTMY`bZg^K-;c#>!x zTuMb8!Pg|OJ-j0J)d#6cqELRGpd40`CcO z)O^{q+UNCgvLynYx_vAfb2EpK8_H!vJI%SoXKjiwZ$R2?X%)VmQ9Ky0#$% z!PjztvM|!=s8!sDdpLvMNfbVG+HEC(OD zcq4)Xb9p~RD8RrqRK!n(=z?D_LvYQO6MkVZuWl4oO||D7#_?*ds3I4a!3K}A`ZgEbL@)pvn5XSyS? zJkNj3X=%^mE0SVg$0Fs-RpfsgV{oM4NzHRu>rhlWl76J9!PhGKHsUn!XB1_j!%2M6 zk#Eprnwv>E4FcFL#R)eT_UrShgx&lfrF~=(O^2Hy=ZD%JiQnw+y_Pkx9efREIHYBy zKj!+z90{Aq7?f~n?En*!9~f_TN|-bG5k6W+gV%#t)Pq?J-5r&FALf&TXkb@>0L%DE zIsKlaqa&p-93Kb_kf4?agtgesH4xkj$~l$)@1E*4|EF1axRx7l0#3N=r(DE%7rJ6; zrJUgx7fylkQ)t)BPVm;;eRqmL zF@;UW%nbI~ZaNT@5&&BSB{M?+I};O{EWg{=0D1aUQ1icS6pAl5D2_D3c+b>Unh0o9 z7e~n)6lOzERO}=BK>PN#gnK%5nGMjTS(RcMMSMmC#VvRpI3lLJ1jtfG-8}}g_f-;Z zu5!clxubMmE@=8>U)Kk5eQ7gfeb&vb-PXRlN&VKY|1c_(4N=u(!7&{Y^MlJ4B;R9!+OAff*sw_-Ej+}X(^LnCR!G6@>kdQuy8E`vC6ZXiSEVE> z=j;(Q9YSXe?b`d61a~lJvgg0%om92Xv+eTk9rWIWI;V*CM)>U9Rj5DXSg1$)T=zbX zvvr<^WZ!%wm`nk!imM|8a?xOHsSD>|F0T0uzw{xu3|{v)h=qgk2z zenu{2&Bk{a&z#PEIHkJhQnTZ&`#nu8>$2r}%nfH~v4z_1+m7OPVDG^4o|nf|eePjS zCF3J>;M%}!OEzFy+VaRcywEmnU+Q(R+P9sT$#1Te%+jo>4AV^%blnhAlDAM)~>Iwoz@3PfdA^p$QJmlpdYmd|8$4pEYc`~%PDF( z@TwYq4X&M)fEV(5N>lh@7ILFGyr~BQDGF>Ig{1rNq8hy3+Ef367hhGF$&$Iq#zpoF zDOSWJkXg;FLk++@k9P<*oD%u;t>?oy!^``3WaktvN~l_^5|}DJb0&nX*4IenLZ~zA zyi)Td@ceW<^4Cwmw>$)mqL5jnpo7i~4D+A9)v+QtcTQf3%IQFNdaHfO8(#R!YO?&H z=*{U1K#(&5q*y4%i8Ax=Jxq{9KurNXV$9&wdpzs4vK-0=9T>mo$8@68huZO8Raaek zi5l(gM;V>%zX__!DnL3DR-_Wk&h4q#sNpOvq7b|%R279p@>+07B4em2puf39rYfj0 z^K?s3 z&+K`k;BQp8aC>w~HQ zNv3Cq4mS~p$&Gs5DGa;aY1jABz%thAo0m`01sXW+DnMg!n`K|sI4<0b&+|bcl3+hx z0r1t5uFkqxB6VunN&MvWN#z?E>2&zIf4gx1twp7!*Pi}U;N+^G6&Lh=sWMHSn;`nU z|9HQts3XKXwH%7JDqSM$jeIdFjCeuQYRg3eIwit@Nbg#X zCt=4%ck8rZ-XWYC`w*IUJm*4GyoZ`w6jwc9^na-&;Y^|bJeV2`OpPd`g5Kt5n;c-! ztQ%t+152j*)*ig*u&hz#KCcLigkhdqmNbB`*;SH(j|bgDXMszFjpud7uj(1#Y=@>% zI@Pokj&s)}x&56UadWeOSwA_RF6~2_ z7Q0xnK7F$&P3+j#X4jBvPN1!T>cho>Uxu8q3+B)N{k)|9_q}u}Iw)$_jza;PBi};} z`LbT~>QlI{J`(9|LSU(6Ld=7GLKH-m+Oux%y_O|Y^YUN=WKp4avENc$K5*-iv8m1< z`^iWTg8s><>zmRBJh^w@VdL{YEmEbx{%W1vKz?8TOq{W33g^z293 zu`SO9LzkJ8>1X}CCg*3uKAuoLa%EVvm_5*`@~Vc_j0@w?_YT1g-Q)A@&fm!qpi#d0 z_jDLg#z(<+SP8|63o+Hes`uml*=E}}E)3?ZQ88q$CT#20TgkpmLwj3MV1R_)!$8zC zt0?K?#DSCYv-Y5$>v5v9AcfO~rel#AO&q^ID^s z2D;OVqosuE%wCgR^V<;%zqD!Es1(yOA9=8DR!mwlY%y!GZ=73wcov})l`4%uDsta{ z-}JW_>EGKG2CJt2XE&i1()c7lb#fGsnHooen_{PaV|z%)hX!ZZF19bpZC+$U{VN?9 z)IDT(Kaz+qnX?hD`4Q~+{N+jkw{HLFeA5ZieA_kgKD_=`Ix1C-$MpHBNti zAa-TqJ^x6{cf??#YoUI;|>&WgTCFbdf0p|1h10E`7ziY+ZN<7AuRIT;)yyuebAv$ z!8H8pCWNwkgPdiB^+tma>)pVrQ^q~4+vlN0I=r3Sd%{Q3`ojv}l>a*yq{pNFJ-z4! z!=8epOI3KW0}GQP1o`a|*zw-5`gp^LzIfLPg6<|$Duc74fE~2uB;V%y2fw%?jkDfj z;;|HqEx;`Cm7#U^v9cbTg@xZp$dilVGe?e-UQ94AEA2JRf1XNl$v9C5N6nlk*H(Ov zW`^Y?oYM~bRgi3IW*eu#is#H#h41R|Sl?YN?Z*uAuzTSQ4C{Xnme{`!R+8EP9sE-Z z*+eXc)THXG66Y5AZvMyIoW3`#(6^DZ%dlMGNtV@41em>;>eRfOj2mX-kr=F_+>XxL zXXTh~tB~1EWxAgcALGE3%Ar!#vPfr60w* zZp~ffMHp*}E2|kOqY7qTcghI17;P7y-r^aD$Q4)h(Qbo7)nsS8dosigl3OlruMV7#*W{r#6->LkE{;|SNJv7!azcL(01Jm{1qI1L zCD#ea0T8%cT2|H!03A8VW1w01@$vEQnPt zkDg|$|7JrI&?EY6Sam(^O*LtiD+ZJ-)9^@eO)BP`#4p#il#zG|}=UUEHX zy*^ef9N*5ZB6uXb1VPWeyaWOI$uzDD3a()1JFmrZr0>Q{x7*C!T8Fqkr=_w67V8WCLA!U3_|92c~l#C4o21|wE(t;FW911@08^Plj;Cq6I0A`%b z?&U%@oO7(2y&no zV}Acnkp!v|#j@*92Y~pB;17no)0AyQagmWs?rbr#$Own0r?mq6dwcdr?@8vvz)_;` z3;e?=-drE;RhsY|YUsqgvQ~!MIx=M~Ci_#QP4GJ$fP3pU@luJKK$(v7r$NJ)saNGYI%bV&&cNOuT`bg77f z0t$$vASI1-hlqrPbVwtDDB(93pq}@=e!uUpZ(q-K&UJV;?7h~y?|IKT<``o(;G9Ul zAJOO}`rya(`AL1#M}dWfF4v!j-yW=~ix_+_o5(w6Ju07Czx-fQcW_Dz@|{J0LghK= zj6?5e{gR?RW|_V_+nET%Bs@U#rYzshej|#iCf60hhlo>Ajza1Q3WH{Z3-CwG$j>-7DuLAITrk0_gTtS=}S#3tvbVmw~H)LoG=e9gdk>cdNOe|nP%G0>Ut%NL3-R%|)jVf<3z5m(T5XIk`&hy&idvJzkRAOWaN~u;~Ox zwZ^a3K7M_%)I>C#&zUd0wtx-e$%`|!aVlvmc$ou>>2|@Bc`H0-j=_Z7U86=TSNT*KSryJHXWnssoLE&1bW& z%S+r%It{YKQ`!ZyjMlV=YvJoWVdbA;nCdrS6+Bqw4p385GjTRd>qVgqjf^^k24ScS z2`TA4pc7yaNvxwrM@PYx2gON_U-hm1ED8lqU=jP2e8!Q0e6k7)Q?M#V!!5;u$5cg4BG$&nq~mz)giZIeh(oXEVps{Q}xZJ*p6`j3N{Mu+!aSi+Qyj zM!x*H33M|bD@Al0B*_;TEGw5t`|Xs%=oy7PNV`eX<$csg?L&E0vOW%811c4;5Y~5Q zPXp#YbC3Kw`Y%)E4o~XEu@nUg)<%o31si6((Cx2#<}CW7DzCl%7Ujn9c-sY^j&DVk z8A{=N!Pw&>K!pGEvUsEIb{PhxW1zsed<4QC5fKqzpAWFc>f+AoyamUhL@`%PkU1$S8U+?FK8j?*n-2>vmrAAW7+E;fvi zVh10^cc)D*lj&4_YN~AX^hvzwkrGG+yp$h?i<6hKDX&(?Jd1qq4vT^aFZFI<4-*X%;m|kTfr6V@OvAt|&{a?9f z7P<3t6uz|MQ?NqyMs)JE>3oSqL|Ktx?V~!GTZ;|#Mt{@9U86UKg(+Z0?BMR+5=AHC zFxN?wvjJHE3dP_#^H8b!8qB4ET@eE2c&YHVo5PCa@`%M4YJIt=M|*J#7JBP5CR`wF zP%O0snYs|T4^>>Pn6_qU&W&p5zICcIb;WJ-9uw0J!<(tJqVGGa7VJ`DiNW#Q)k%dM ziV0+5=vNcbdCcCYD^C4T8Nte%B>~D79DxnROXI`b3Tn!a<0%9P-xpQa`E|?3%Z>bArH!S0(PZD! z8o_7+KYBhwxVajNKwyvj7+X&Zmx_LVXh2I}u4O{9>Gou@?R+wY{>PchSiPOoTq7*( zv8c5lL>G6N=G$K>$ytaXR|G!!2z47~*?|7cwEgj!%a<>6Zqh?T4&m{@M3We33(Kac z)%N#O042Kl(P?E9e(LOX+~bX7UGr@B&8;)W-Ydd;TZ1<)`0Vi0ir7WM6iJrz=P6+@ zNDxehKvLrZU&xUWO5LkhY&qqK?-Dq_r$TpjeQT=;oaK>;Iv{Mxe}6y=d1c8k>@P{= z)ap%<#kn681ml>Ak>LrDxnL7g`}4iuD{PNO=FP5Jf6b_GSCmES0v|^SGTQ3duIaOS z>_44z1u4qCsXCK z^ZcDByvczGrsa9r%~8-GCaOt{rZpWMIRz>R2bCSAle+2wLf|nYMca9ggYiL9#VE@# zEOc2uryrTu5gHjO2d%VVn7;B{#2zVNkun1+_+GFJ*O>narRlmKOjCq36DB*zLtTp0 za*)1H!juO@3I(2hL>I)1s9|0W@>+bY|Ct#L1tUPfCy=oU#(+4uxitfun}m)o0=P8O zU@vy_*VIHbgf4m*XHg6FUV&)?Hh2%2kCr)JxVz^O*1?Mq2Vuy9f|HW~LVW~EM;e&B zAQj^VkXmHZ&FJ)j__W9phf6)ujzQNPYk?@?CK?qgh=Z!-CK z$hSjiL-q-0znt_Vlos~R(k5q}Sya(=6(dk6nibyAJsacAdt2iX5Qx@2ErPZ7x}@$d z4A%Y{W@B-B4BwVnK!DV!(yggKN2kDTMW0^MiyT_c=ivC_2a|Pyt~Bi&#HMYu%9AYP zT2ehMzVP?I59r_y%j)VItcGXhao#njbes#;Uufp+R3+C?_t#g~dk$dl{gj*4GU!ei{?2dd4tqq3%8&nOeTFWMgBa=J^%-ZPIonj=J`uLZR7 zDh%9P>*Ctoz4d10upcl zwRW#u(zkb1=eBU3@Uw!+ODCbNanVU?FeLBlc`N!Nn{$`kOD%WaG}(6I(`eg-!@;nb zZkS_szxj(2;q58?)p9^!0WJmgzF>vN_zy^WGT$!Me1@8_rk580`zz%rS|9plaKTCW`jmw6 zH35l{F9gtiy&n;^2xpok!c&FrNZqZpSoD5w_TI?;ESzNlo%oM9FGPH3JLn`Mytt$* zJN1mBo2e$$5@$WvX4#s6M9CcYIDfh&<-!Tdy`)I99I1)&ZDWtBD65;-PqP$Of+H9@ z3MQ0TIqzMQ;Nr^G$R|5>>I4cG7dP{h#BnxPLUa6;DHm9ptOC(D4jRzbuPeW4o_0Rr zaodV??sr0@W>oJSJt(h?9+1s#eV1)9S(?I5(`#H^^4wWhqpdvs$sxpvk5&DJ-dE3H zZ}&!fp1GH$iXHl;76{>rj#u}c@MC2xiT>^m%igZ)L>2DZ(XlqpFSlkk1{geEOGc1D zn>jop0s{q3h(kyf9ToNF(-Jbr4#n@Yv#!)7d-Pp3pSRH`p&&g}*CNA2+ovsi=^QFp zBbpFaGGC}(l~4!tr3*gFf%D=X=C)_uTWuvy=V@OCpa4ehB3ylR~1*`0oCB-=SVH8zp+;R8DS@eE)tepy7#p5*F*+1p=)p46gD;?-T=O) z%CHB)yhicq?!HEYFi}aL4HFEi%LlG~TfK934v#jcwXRa;cf9SBh+8Y1;C!>E72n&0 zBh_6>z~wb~JuNV*oAHO!X`>JtaWb%R=T@K|9injd+9{+C2wg13z&VLbh=QX7@u5^x zdyw`=HtZBX{8N_-c_ycYwh!^DA-B^+%B+14(~6cw{Dfs=GDKR;<(v&NrZ7Zs!j2?Lf-7K2$KA| zv$3e!dyXLrwS5WMq8d)T1|6SZH%VCM9=Moi4A`kE}_5+v2P`p7lt{QyxBxc?lh&KByBD7V#zP=uXg1M6CTh!ln_=X!3D*w1D(wqT&AJlYt< zXUUcXAcl#j^jomp3r!4=lH(8SgA7 z(}#sFP?`eOa*=0hOJYX%JR7c^QkIc3J4-e%qE>=zl8;`>hZ-tJ1xV46loGPU`oTde z2rzGlq$DLz>Es#8XlOi=_o=&wb-`%{_m##Oz|0W?+~@hF*8SPpV&q5Hno1jW*cX&L z4LZW+;d9rJ^iVpv>t4Z-(g~z}$IX0FwRE#_`i&&>Q+;WY&qD@bpT~TGDJ-!Z)~hR- zepjdlMb+oxQCv0uCm01O$ z>9-<@n+n~pe4y?0@YG@9eWmaA4c95o^ydc?577-BJeKV0~!VZ(FDjHWn40*I^T(!FVijge5H9*q|@1U=Z&PE|5MfO2# ze_1!@RD^j#d>Qje4l)AUZsw=>h~fdku194}g_AP`CtC#dc2+eh3-PbINn;nXQLrK1 zd5MLMi{;zit!css`3IPntv%fD?-*hX7_}CJwy_;n7&bKGvyCml)cbL zV4nt6NSs-+_Y)Hn*Fly$4OJXCEnZeqs&9`wdxw%02}2O_C@2w6{8Y{-zYwGwUYU8> zpT}P#m2^MxmITF)*)8HPtMf#B981qD&V9xP{uhb;>J=Hwl-_2-iy+|yenB%(;Xv>t z&TOdCKeZp2HROIBL(tw>^Guvv01=%Ef@tGvc#JuZz~-_HA#4c&AY94JNKGSlW~ocp zlZy;HcJ`-WMS5kI8MQ&AB?-ZSQu9NDCs^l=uW4{I(ngwyj3__;3w4rEv?w0ZV%jeQ zU&DGi9MoJp-#pmQ8@9@96b69)!huOU@HQ~*QxoW3k#^K$fI1ThyhJ-$Zzs)8!t<62 z$P)}7@;QT}ekLe>ppA}{cT(*pkT+s>e>8i%Pri`oP4oO?42yPbIJ!EKPPyQv$HpF? zG6W$m1QrLo!?bG1;8m>(AL!+FAlg@GX+OPv67Z!rej0B}hn$G)l<=OMz`Il}Kq>t1 zTcaH^V_3LEbkfP;a5lSjU8ky48n`2_l+&JGf?vcR1mDqdLRtvVerD&+tvfuL0xBGE zy3MAW3<2G-XA#-ffRO&|G8Ud!Md?M=AC#9>RdGdeA^KXJeT)xMt!>6x|3INr4fhV4rQ++3PfX#g zM%H${9Ig-hqx$q4M^aMBy|FXDX?mAH%bhAz6sDA+fH z@QFX3yC+1QXH>l>ZDw;}{H{c7(Au?|dX2Bp*H>lH7ARkXM&jRH{&94=w?+f~4qpaj>WM zLog;Xp6Erj%+vBu|6uFf6~xPW=It(C_V{&?{-TUMX~9no-Y0UeS)~R|21m2za^_QN z0X25Gu7rlNQ#!-0=yAz1O;FL%PjP-a$u~=T1F~ili-33|VYdG(ZAWG@S8 z`S?S3zF!tg9`ztlZ-YUXN1Mf`mq+D}uqCDZa#(i@oZk+ojjTZYG3Nvlr8lu6i{4yH zW7{5=(B=*j2xC7B99Ym1NsM(Dg$w-WzJ?#2kNuesOG(EG37VIx_b2iS4>;X;AU!W- zGwH3-w6M9VJ$=my4*|O(KwQ6OR!t}eN!E>1J++IOwh&{rr(OI=?|*!HJ5-6c@oeRz z+=^QuT#p7dQil-EpSww-?s>FmGM`<+L<3;Kw4uxaObaZXp941qq20eT?heu`HV#e! zsT-mns0IDNm0Wu^Mpe#3T`o%sDm7|9Yx@cxzqZFXC_*}SB=Le2I-E@b{O(+xZ1~4j zV96#uiib|bL~-Q#TiXrp4S~Ht_ZhqUZXnWqJALZs8@#gQ+tq}DDqJQPS#_#DjJP5j-5 zTKA9cx6b#D1ubFR@9;7RI6Rpz|%;HoEpD8UT!RU=vD z)o8UZ1HUz^A&B=_6+L`?PQOtcTRhG8L7(K0E>3>4;cYp#S%cMG|M&QeM1;kQuZ|HE z_+(9rovVlfO8vwqNtLJ@E%!~8#8rnX+lOy8xc62v zCI|>Exx+eZXNy0+DCTGxa+L(%zy00&)Cd!!nop0CWTS-n=Cb@?zl0+TYX za-V-&MeB%zNz(7IB|(lHBb1U0MD|uobnPNVCw6+a<*EueuxPLpfF@s&oYK&{Q;9_g zS>MJN@cM}0n-DlDe9@@}QJn%JEK~amSJmS}oT1uF;rMe)KjaKm`t%wD#}J}Bb@Gp) zr+*ewXeQV32Bm~*9)q^rtYL^B+zd{JMTN}x0pFk(^aoG{sLVYgAc*Fm)Nc~<5(E)T zvkr&A!@M)qnE}}fV1W>qQTS5$8P61G!crbSEo|KX&Q4O~&N1myI7>|0OLmU|E%1evdRR>XR83^Qn7D9|T9s8}}pGNr0?K^Kj!y z5IdxMKIepRW%9mpwf*O+g^nUccF?ok?6*#GxWKv$im3m^I@Tm~3X#i%;PgGn7u1Wy zyXOxMGMy`v5$^)7Y46giu)H&BYl1w48B8Ym%pEM@3uIpqu@xzw2ZPe#oYFH`5p)1aOqHB9@IRi1rYj8<7uW$Cn(iHS$T&f z78jkpWb6LZ*v>g1HcZ7fd>wKU9c0OSjJIQ9LJTb3Y_7k(LY7p4L%9-3uWzdCTS4uZ zlMSDujn#%dvJX5iF|+5!G(2q)e91Ggaq)Dli3Nc>bU1MW@&@Ejfa1%?TYxN!);wG+{xjBDR28M{J+ovW=v0`q~23|bb-bLE&Iqe2a1>G3uEiz73*N6#E2qR>UL zo)0Q9Z}`C4`Pq8Z1Y0|?z21Cj+JW<$xv+7MY=GXF4}V$MSqsvR)ZkT9>z%AS(gmt< zeIHeWAZ4f>mk)N6xwb%&$Ll!4*ll6jl*JtpuZcp`nGou}#QfV_6$Oak78JMS%j_*k zOVRW<>bX27Ub#Z&o+O^LI|uXW@xrA56_$y~;Jo;;2CAqkJL&W_$a*iE4_dcX=K;@_ zzbJTzAKFVkkAG$PsiB%rb~6J(AT?-PXU z4G_22XY5N6MVOfiF{aB&M+Jf#OZ%%Ahz$^N>a^v%FRSI_F`cQptG)+S4@2a4HQr3| zxo_2l&aG}=KJGZdWwovzA2^Ca-!!I{OzS+VOBqMC1{g8&Z!T_hKEXu%DQw?lsW@9w z(r)aQ7_!aLK+i@D*b4yzw|IS9#FT7=ru!PcUZB09Hs^b?(R%$?GI@MN|2ZUMW-s1; zYk%+01|H6G^jsbX?0xYPrkC+M1k%;f|{o{!j;Cr0!_^J&XzRr19gl|0FNqgF5% zI#^pPwNHM3NLDS;dgOP9{ZQbi;FE{;KDnbgm_U@DGGBmmPS*!*Za=}cEwEIsaTC_@6;O^&2M9uqGTOuZL8w1Pi+Gvw$aXb<->FCh{JB>_iJB> zF;m)|{srGcoa5kY3Sx_OU|1rqS=~-cbfZ?WB_4aF2D(mcr?9X)TE6$=;~B5c1*Tjb zV|uE;=FOgk$U5QhK@a`003Qw!+HAhWFe1Ll@ZQ}mBU?K^(W?S^_iuZn>FAh5k z+M)lEqEQ?_&=vq1;QrTE6ij-&4FFV)4P9FCCQx55Kw?|U$@}06ro>P!mUG2!YFOz< z>+ws0G{`*##s}~abG~1&&PJ53km|g*5x_+ANR7hr`6Q6DVsEj??`y>(q7C31Fhn~+X)a|=1Hz=B z4I&SudJXkty&gkE^m-Dw`uxw9u9adY^+r-;6shUvRyKmS!L7NR!r=q$XofyFHWo#j za^Dz!tE#acMh!e@XZ(3(ZXa6HkdWv>J=dIXUcas)uG)IXM_+dtCGo9%v^hmqvPxh<7bXD$Q zqE+pum9SP$e!WIKFh-u#V}3 z__xd_TgdRan++dpNc1+7L6o@nH1?DA_$vtv`1qW{QJZi-GocHynzu*@93GnQVXP8| z^He@CHfaQ3x69q7h6AZ3hw0mWjpmq@zuQW4cm4TQq^RRkFAG5j*P_Ya4Btaai(DOL zY`0f3e%<&jPV*J(iAdk`-E2f1{p*u^-TSro9c<_dr7ro4pH3|*Rh$IF2>r&y8B(Za z`m)QI3~WGw85Y`6u>9&nr(#$SHdg&-?dSM^N=LBH@E^BBiu7JdIKe^BTVQHSjgWyF zo7>SU^AL*l1%y2j`*T&`q*4Orv6dazEfxdyOT;PQg}%1YSDi|5tAautYh(2{HZJ3V@PIE)K_149K*bq8MOn)y}t_14{h{#ldTZ!0ONK4S(1IB31 zhEFvOBZE@Q_2<)$=dRkEqdcZ=y^9n7HQ;_`L%5LOD)Db$j`vS`X%G+#G$2LtK0s72 zUf0rVgu}L~>MS0D?rac?h}KY(ViM7H82zhV``M2|NvhNBxt#hR4YK?odN8CIVC3LS z|H|)aE`f-BC6cl@dBJ0iHaoaeI|2*k6&rf9Md0nSF1E(EsPm;+ZIGf@fR9LLk)aoG z8SsgO-*VS~py`UQ7RR%C#6uvmXJi4{=>shetbezA_(S?Mchhg zsn8H&3X()F=^H0hFaR+(Bodu9Rd8!(L5G6H8uS&Y(_25^8sITZB?~tmF_3yNX{&}6eH<xaFoUEo(%9hEAnbgOrDT8C^80zF3 zZIdpx)sL(Sk~G*SiezMNP3B~;{_*02kd;k;baz|x?itP^X`B=-G0KlBq2I$SnMBY} zwLWp|%H?hm@}@WeykKGHhTOawTt}GW$_xz+0JIPc_Ot%=IEKJ#Usnk-K!kJ~XGkZe z(E@JKEzG>k{Sy@7gD8(5qtf@h`FmUD4eJ0}T)yM8xW}OoSYR`~zWJG(`l^|_M59r3 z!V%FC%p%+nBQ!)P5;7%3EZ=KgRY4!#H{lch?$Zh=92%jvr978~;=X%{ zlbgGd&b}&u{pvGN?J#q4;$DBPB6I1|rSkdk$VkLMX>xiR116Kh8%&_N^5K@y>DAcy z_)r*gmj-hs)wtq->7xLQ{f)Wj_AGpS`2ryd5bRvWNifl5pI0D|j>Pb2*W7u7{29~l z6}pfu8Mp-AAjT|)t+AJUcY)fryirE(C7J#&J$K_wE+E*(WeBG9KIfSN3(L#*D?5~t zuz=)%+kWXIE9sp7*2dyJeS-%t`>lKPZ%8Ae^46Z77Wl>_z!L>-DFh&N7$Sc&dFknm zyD~8hl5bs?w1G)$zCKY0jR}t*WA43g3`Az;=JZ#bSB8a)?~G6)QjIo?`VT3xz0g7% zt#Bo9+s+`&C>;ux|O?!xqm?$7$kZpo-8t z?8^V`x_<1ELMEB~L35x-qWot&o;Ebiu9Dp@vg|^%AO-g0Qea44_^BNHxv}18M}#om z9ZC(ANl`O__dB?{!i3SY4<0@=Ef|L(m}n^AREfR%Sb5`qWo4x~IG>^X;Wp^(F(KEr zhyw8b{s04%)bvEc4h6I#VF+XyX+#wPsDV5dHhz{rzx&cEzB^+40|_M(JU<7$R+{%b zl+5u|TAsOy6o_~nKUZQtS%GGBk?{&Y`B??(!S++v#S8wEPA(<`zt>^m1M|$x%uCaT zVEFjAw2YEoLMRi^yrg%#X530qeIcc7ZftCPYpg;zQi0WNdC2tKYfUcYq>+)4%1?KY zwZ7n%5zs`01O1c$T*p|0s0?eB_+2Xj%Y$U-%LRh6`Qt*@kjq!MjoP2&h(BI1Aj&Wv zej)P5L7GU|hv#w;y4E#lXRle-n+Sj~oj-yF4smevC7y5+A5^}r-#HqG`KGs!wc+jS zy!igEjLblANIZV28?iXMpi?+*FYBQ$R=TYqBxOZ;`8cts3TbF44==A_`uEp09p;7C z7RKL8BlhX;Up#SMfJ2A+)BCVxe8BY>r8aky_3%R89s2Dy<4GsB)o1tVX^&x_9OtSv zUz%Fi6jw#EO8l&?r~^co_{0M|pb)}AbQ9Ht*J}nPy|yMaF!x5iR2${>B9>=jH;Ab> z3sYH|i2{o@=oYDZI4Y(#*`U`poG@A+H~yb_{bD9O2x2NK927Ws6^P#DV4$NjSTwAu zsrfN_o}ImUZfB|~0uiP8Y&1~~l{<4l|C|Wc8Sout(lsW58KMB>-43^lDE)V~w*~J` zH=p>FqeJxd>(`sf9VAXpPQX#?7m%>ZWbyc-=DCrWRjn|9A?96a_A}v9;x%FY$ge6a zh_k@T)53GY^>`$ApPZsVpH*m;#}LBqu)`Ys=u-CZPH;CH9Ni{4Dz&yX)iH8T-U&Ew z)a43Ze78N0J63Ce~J~)SyfGF=7rSH6F08hgiLcTcHpBH^|4Qqi?RPsNulgL$*W*07eUE$ z30Ts>C+LD>q<-Oo&=TqTC;M*ewOR;(v*Bg7Cna=1hM*R(K)3*ikBo7(mrPo(3D{1> zOZ!P}uZ~3k@h1&TrEj)R*9Q@tpLQ4HfYzqw>cyNZciLjDPbLd$l1SKRR(W)PvP<&` zK;!A!`Q?EPeei&C-IjaRrGyuf&FTT10{-KvkH0C-bJkfP!=&@PluZ%JOce70ES^;Lg- z;CR$sF+RI=H|x=kvFxl?Uk}Y+$FJ1(?6~pA6=h{86h1z_ZM2OLEy_P9$0lKJ82Gzz zY_9{sb|L_W!TV ziO~;O8g&)sGTJG4gMzND{Ruo%Vriix`{b)&z9Gwk0tP7*{gc{xb!JuNz;Zf4XmRe} zQ|h}SQAQ+M}{{H>)(MnCXhJ)${>m~m8N43sPzVs!e)!w_(m+*5y@Zf2Um%V}w7R-_e!haNr@ftP za|tOP)IT5D?gDTp3`DEi=p0YVX%>HQCwU8m8NrGj6E$-xD26Ul(SSkDgB(Fwu7LiJ zL<&W?ZSS9PX3mldt!pr))ndZA3%A@bsXX)6WVD)y=o2Emdm-Wf(d5Ye);=rF=CbzX z6nuv=$7yUhB+Z3ufRBx;WsQ;0$Qld_53jHB-ThE_QyJb?&B;Jaj6#(;&EkWKEq7b| z?%oB|OW06xUcN|~$CdCjmJkrf=^Vv->f^HDAAjVgYqGd4H`@7;s%e%Kx}E5GzZWf; za4){hA#9>@eV~kqS=MSo84z(>A2_urDx?j7$>{nfck^FboUMW61#r%T2RzCj_A2aP z@+jYLZ5*ND;g{fx^}?4--2v6?2XLG0d?7t7B%vADR-Pgy_hZx<4BWnqdG5ngy<*Z5h8^%@7ol*Y4@fI`R1 zPiQ=&EEnsr#b1M<~kF~JT_Q{i*IAxxN(@7Dn`=>FyZd}TFOUq z)rb`>E{d9iUpC;3j}6i?)`NpxdzXa|Xdt;jVk11b!_lbEMe>z?Ga{*1?;YTAUAZIK z`#yyLEHR@&ITiuc;?7sw^N%&qoW8E4Vv$@-{&L?i2~R9`)<-NBH>&P_IX zP6@3e7?ZivNIeMA@E^Z4EBiqdSi6$y?|1MM2g$<{lade_cVlz28AzHgxGs{@3f)qh zD`)WAb1c5qi;j5tLlh@Ibt($(!X&7{5e3GbRY5RsrWbc3gt^-OOt{FIeDajoeR${8 znEQAWj4Ga-nlj5rw9n8n!2OQ}uVJo@y(>G(PcK2<&&_)zHVKnG=MtRW!`N@k zyFX{2I;|ZVv1;YGOA14qcCr#mywhBC>_xL4Pp&ntFyPG&epC&!l=_bxoQ3P^F)v-t zg%7&B#>>lrHxzVZ#Pyqkpf zmM=UH6PUw)tXdskrdS$C?e7_$Ewz<>kV9w${=bj^U1RV5k>7`$rqz*^O6Di}X?@R& zTqMh}|1PE1_sHl^PBR0sGOgL_rx-51;VTTDpkv*CMytRd9eeTTlEXDwCRXkq%;lc4p#p; z^3+9-+NwbBfCe8_BS`H4#w5@dqbf+yC#RJ!%eDZx(HyA5E_jZ?n)R0sX+($jEcM%= z`8ULo+W@CQ|H*~uIxA2hQ4`bC(-Z$ePOPQF(#Za^T#(;R-6cALQ~urGzipI1n;7}G zTg%Zi`}>r@1xWm8mHfNsBoD`qL8t%xwdLre*5QBtnuYt(6a3E~W2rn|Ht^T_MgMbP7;ful`TNIH#g5fY9`i}=4_>^8f#LCp z<6~8bg!Jj`6K=hG-AUrCf`Uc(G53$ICe!ji zU-1l*a9UWfRk*E$13}Xa#Hxq`qtwBUIhcPUBDR~!hanoA=oO;r!{8%LkoWiVY~yoronn*O8A7Q>}->BaC^j|oSTj-f7q!T zp4;va^HN&cb&x`gCs_~j$VPoxULX%$o_ZhtY+Ra&ogMC9 zYW?nDp8Hd=%Wt%0(|Rw6MQMxy{(<`N)>O8I{Za{b4^1|1vWTWJ6myXHtI4ld9TirF zydnB;++1kgp1ycNgaXLxo#@^9_|0!h@3ZWB%9X%Kn;c(0iq8TKC7l(LcD*cr46f^9 zmmg8e;yioFzOmeP9ryuFEop4}ogakxlC6{?9LC;SQG*O)DQcWtTl**iQtL9Xg@lV@ zcTb)ubC|prD3~&Q@Ay{nK{bMWspASW#e33ICVkQy_o4avFsl>2v7cNte86)0+z;=CRlyXt|jmUUrzMMSJ!7 zHI&6JVzwjiPH1G22jURDQ(ire870S5&BHGTF1qnjzG5g7$b0~Nls8A_w56t|eyDV} zM@mx88Zh8RJSI2idrITa^79h|*n{XfKye5rA`#7yI0Y6L3rT=t5%RyBskGxTl%0U7 zPA;#=@uvr-X;iGwgUlKNc#Q@AjHLIroUJW)G=n5QY%>IclL^F@AK?QQvwigzCE%g= zwQ{ueI^&X)qaoLv0RK)p@lJqA+Pg z0plU+!5!r<+02#psN4QOL-XIy`Sat?r~Y5Z{_MsI*Cl?7R$@$S?0PVG09>ISaH{sd+Qi*Gf<%Shf$NoHiIWY@RXC*m#RMlK;(lq3&3JvaExXkVuEvzNq7HqrF(wb z^)%W}6MmE4RQX#UUZO!vf|-ON08NUE@nqM*(=p$yRv^X%2PIK4$?TN7JCiuYc4M@F zY-}y|H-M!Scr8pU6o=w+Zby%MbBZfacIyTl@ zVxt4)0XcSHo%M^ykI8+0@Bi@l-m3`y4Cq0Z(BQj&Umc@EE=5k=rS_qkQLRnPAVdFM z_q@TyNB=hzeE1ualaR5uW?w6)(8gX!?Jf%n*^N>kS^-56Xy+M*;V}9TgXN|y)VcIY z4E+}oxyd;*Oc?o10B}3v^su+H0UlfM!~1D{Y5FFxkk_}jAqHtGBbL7!S;Y%$AfLem>hHqwmu=f)Vi@L!Zek%*#!CC^=xP{;sIC!kPUs-@VPaRZS#AIaV zo6XArv=;dMbOJaXk?4Y;S{~47Bt>s+y?h89ReOs-MC_Hyulj}$X)_hzGDtNd>Gl2M zrAz3D&K2an=HLjs0~&4~sT*XU$?_$Qz2$0G=yCzhDf8TX-L-+#_JBxTR#ujUrN!^E zpauq9h!g}t<#_1s5|>5!OcuU{g@x`Ywb1h}4}d~GRC~Mo zY<7v9?2NRIWsvk@g1rr<@$Rtn8t#0lq5)mE0E74PxxMdws-ZDp8n2`&i!4c1$t`k1 zYzzy5ftwm%0hbK`aTqy3O2Uf-s@X>1+B>nf76WD_Q}2K8=co0h(86|iysVOrMb z;PO=iR%rDtTaeGK|M<~T)FVZ3cn>b&hUK%;RkzvKCXc_c$^S)NdPB7Tvi}(X{S3gXMgorQNP-7 znl4j`+)qi5<>5<+WT9bUj%o^;be`VcBer^q-|Ycz&9C{z^~HNlk~2<8F+*1FM*_Sr z1?1!03NMvD7vCm>`vYNbenB%cSb#Rl!P&Vew7jyi!S=j3Co4oh%3%>ooEh~FGE6?BL*4bpn;yLiG_tVROKm%NPL9u43k47g&=^qij3I5j{=K1 zjd?H|16}zAT48I=6Ev}DT62l#Xro{!G=a!<12EmGpYt#Bn>*Vca(R96fua)@@$A)P zu}pzA;kvpy2v7(n4~tAKN-vgGYbH_DZJ8P|8i2c7-_(SamzNh0mSnIH=p8U;aJP53 zt3dCKNZ7&HA~!g)t(VKGTjiM$p5Oi!)p@(o(5fm4#4;GHt(*J~HW_i~(MYt?k(qBW z7$-O*g99``&(7Zo2McIsNN}x3z&QvBO&HjO7hx7D#H*_6>T?bg7x&W%9Ub@ypf2Z5pPo&2Ms8p#TtQlDYW5i$r&r<0f}r%1-@THqu>sG~ z?c(+fCxxi}qu5w^5fPCM=+dQfhEF}WuQ`K_gCox3>*ODvC7kRN%So*o7x7fJW`_vKMjVCc~ZzfG}!=BlOTYdFLeN&Ml%rJwxNMa+)SiM_WL zwdQ<7+rtNq+h9N5FPASZBD}YkN#;8fSlefv9HE@(k2^1_E8qPb{QW|~sEy~BI05P6llB9j3do-r%s};@ z&2=?f2KE3%esyT3K>tF%01SO1P>oPQX)Ax3(T(%X|GAixFNd0f<1R-`E1MbKj(B>c z=H%q;KtDMgf&#z|b;?kMdSlRyj+#uTs;*A3b{1h{NFVGV>)kUaC($V3w7GMB*G1 z)8W=#@v@Z@ra68ZaZWqyrX>F0g(9DH%}SVojEwcy?rsFY*pq#dY~f~K=R4m^f^j0^ zftYhfFuTn*Nd&AS$SClGJRn6pclNAuXL~y-{OBw;7FHPK5b4P`T>Q@Vcy~d4$jL<> zZrvVfo&Bhr269+rMNl5DQdA?@Nocx1(`41Zk%q}dMns&w zevN>V8@4PAFDeOry7hViPQ%=2WT08$j#-5e^qF9r!788zd^9=U7=~5m>+9^}N%}o1YJX^vN8GWF%#S+@m1%Tx{EY#0DgUh*3d9L!)>85u`qPd!I(=Q9FT$zvLf#2iZ zqB@sN2dfkky;|wtd*%gW*;+XW&j3d8)hu9;_g}r~n@=K70%4l{*|z!9zL`f>i+L_7808q9gWQ;f+<2r;~1D2@TQl6>$k%J7P=#ac<0N#X- zqQ2n3^jPVpy3gXLtf5kSM$lv%fD;OWQ=ndIa(VV;M4$bd_;sX|58TdzEW4h`pjv$hb*!lK+{8u;qg5|`oX3|@#J}eILN5knL;Xp# z{b~>&ka@gw-ANHIUos&+xDX_oZ1Ssvf!qMpp;q;;r0T+j{aW7=tdi@%;RhK<0@EyT zVEqLF%Em^DhkS1dD0zXCWhW}}Yaz|S(XlSisPg6D62Kt20IKX=08^rq)y6Tf$Qj`H ze><{7CW@Xf$S2Az3ky2*p z-5olJKZ`GIvtE}CddgaKRCgypAv6JZP}avs0tN62`bepL3#`~v54rk@e~4L0NJzZ3 z>R~{tOE?OHii!lAqfd2E?rAReWHdI)A=Mg$Ue}9mnq*^3i3y_4?8vQv$L#~1EF{qf zADo@fAq!HZ!EG^B53}8(5T0B!Zf6>CeYCCi>SbTi5tg;fZ;DeT9 z_PfN;MRX`owYe|9#4q>xDS&ij5FO^j1KJFNYiRA2G!>qvrnvNhT|yD(8iMn(oeM&!Rz zKX%i=kYnItAthH~pa5HH_5e(?f+cSP;GTAq!tL9)kpzhe7ag%>KtU-Q@Yz=t#n&Sv{mG4CWQ0nC z;Fk?f@vgG}9b9=N5O#>8a8?n%@V_TGFVFV^95Xfju%u-hA;?glLNw6PjI;)+uQ!q( zuGY0N)J(vg@jv}Mt;~`;`>BCm>J9*;e2X3gDE(Vd4zUEe(>ZX^Bygcup%2*K(b2&! zwWg9|n~0?&_$!mNEDG{mTwULy*Z?xbQ>4W=?$GRsxe202frGo9-l?wm}H7U!rEbcrRP_bE0-?)e)Hx{HJsjb z`s={k4*1GKEKOjlZEfyVjA$*TNj;m2TRy;rY4*TD(S`0%ftQO5nUGDI$;`Z#3sooYhyE5l*FF)07yR}a8^n%rt4(4fF zK7BO@sah)y^#qaC3?V(|Q1O!|M*MQN+GvEQ_f<1&e|OYUPj0%@8MW!Mf9>s4*B)Hf zm}lQ%@9A~So-?$Ya(er`H!n3UznVO08yR``dH7oY$jvu?ZoFM)<|ChA0IAH>*w|Q~ zFA=iVOnv6C($r|6sq29Q0mG9cZOEOC9>fz_pzs1RQYtDl4S-2cBGa9wpQ3jck+)Vv zQ1HZK2tP~j@Q5`%7@^k+8L(=`j5aMF(7S82frH+cz(zxUN)swz5IMbn&*t-$xlnne zIVN6cv)K9_uzc`HL1A=;$ort)f$Yy?6T3 zg)bJW`^#`oUpDZEv*8@?%s+u`)DIj84Q5L!nYm;oS_7&ma*`2eg zh2bhR8B4mzl|O>{j?SLGynP#m(jJSZgBIfsPcLLYL*K@TIgG2%!&N>=pQ7vFLcBBK z=>u}%_i=h2HA*vJu`Yc7>HB}R1h*N^`S&lP6e*N{|C}g_ZhO-&F=~Odjaja9zvq$M z<0YXDjbQfOMYHD0clNROE-biu{rdG3!#n9PV{4pX`N(5Dho$;kG57o$yg9+9rEX#xJggdqqa&(B$V=Jvn2@iyUQ!Gy0zbe6<=noc7I3#B2o}A3xfco~ z3hbg0u|gn^MS7n*am8GS8Q-QfdkXG>Pe9;ybF&HJ3?1YneJ-z-vaS*{%5RyN)3yP^8_8dLFaD}`PbN=H$XYk=FF|+WIcJ*J6 zFHlTQ&e+$S@6m2Y3yWJRfQvX)QLqvGkpUvj0mpz6bz59NQUKfi z=iZyLJ%N=u3DCTYk8LV?^F|A>;t{l5>$>zKI1?aG9v!=1GGMowYl9&C+uA}Ll7%oa zIzA5nSk%;X2tcg@-={GW%XQoYrL9z&prlELgwzLxh^tpu6-dj%ujLLxi!CWD+wW`M zjV{TVtet1NuksX>LlfznSil{{g}IwBZ@s|EP?`JvX$)LKkl|ahiD6OiX^2*+@8NH(LFQx>ecr0KXD|u ze_&u3mW`ZqUn(9i2Y$Wrp#yxOTmlTYF`Mn&ABDTiwr$$#EClG08A=`!OpGNjj;@edr6?w`@c`+0?l&&A)q-*Wyu3k!=Zpjj%F zYF!?q!i)!CYT&GZT4)N<^K5T+o!(swwL_!G9n=72ltn99oGF0Gl7-5A=sLdi1^r|nrbdAXofroX>` zB}hFVA0GuB!YVzsD+_0NPM)RdpvJtXSmUe~)IKAe*{!~Rzy7IH=|G2+$uGBVJASNf zeB{V#*)gBvB6DHz%8#WfZqxAnmbkV*J@n^eYEaQJH(}8(b~uJuf%zZH_e7Nzit)4D zxHizxD7*rhEh-H}CzA#BBZyLnzRHh-uf?l}h$jOP`9z%rp>UEq9zQYXIUp4Stq4&j zZsE3Gl!i8^`p1sNqhb}THgC>hZu;ToTxTtauvLh1WJZS}Yd~DjF}+^dOy(xjXajB| zJ0zmaGTWv>2Lf^^SqRX0)Qi1@AK?ltw*6Q|Zp*^v$uBN-Q*L;f=a=ctzvRbFt%DBc zOM10p(WyhH&^0g!MF8JoSt_KTM1LDQ2{|hUxd;vj*CY7;R0Ec+bqfe)edEm?4tqc; z4FC%G`1$pFzQPDVr{2=B44r5?dV0YH7p!O^+wEeYxY+c14|abO448!_dmB84&v+gG zR8;VwSt-u&Rkpl$(A?bok^5*S8ga^^wg@E$AL0TPnGH4u%F5nIT*N~%8-}Ygn%c^B zod5F*x|2+Q>;|B&@AqMcY>yCGYiPeMce>+ZTB;Zjf{46J20C?58 zjj47aYNjZ)u&Hpm8F`1b+ zF{3!;;>F*n6p9tOL-(&4biw;NTL>hOhbdWBrQB zM->9z4AF!X&St^m#=~J4`r*ofQfT*_x;xQd`rGaRI4cd^-%VY=W5+>emdhuL;qD66 z*4EY>YJA{IrM~jkH*ek;yuTv26?8s~;^$X23&Dy~WX^jkMjmIX+yIwI2jk;lwli50 z9~2I@f1AuIv%Ci#Gyw#8U$K{p1EjR&JM4F(DnAHCcc|DXe0jz(Kd1n~jknazj=BV^ zIbXFbV)I`1#NcJra`tfO^qElcqp38n@uoJDW%KZcd&k@J&%qb!z#O(5$@lilVFTM! zit6g>VozbG!lL6Cq9Od6Bb48bW=|%rbuL`AD3pSKnxH|nW0IDpX4c6$@*Cl5*3yh0 z*u}P7E`p1kHJ@1@#AF7ChTa|+u=9Hav?#H2=OIw5p18jgXJf4@O7}n2-?tB|K@A2w z20&edw&GaMRaBWXJ@wkKqFQ(jQ~Pg&#R$MSoDC?CS5I zD*pmP7DVxY9Ol%89&2P-&F|d|+)k^6(iDSiePd16KmRbQZtpJ)*tTJVCPI4gt5@aE z|61DXIWCKtg7}|eoSov!BdpVNbY_hVxFc(ueL3F@73meuN&D_4dU(hnCtfBuS* zEjOVIGxg3S8PgPc?D@5t;W03266tY3BNOo^QRe6izQz*>6l0w$r;=cYEcikK2q6}u zxTveMlT=~yk5BlTTOs*OX3@mthVlJVY2Dpc$TcOSYS-Sup$cMK+0@HVdXd z`uR*>^A1`ZoA|l%rYjTEc=P+c zhEynqCBPT}riGl(HHKzI(mcccZEdlVt5>UF6v)jMmzL^bd~oM+#JPjCjDc#eaA}5Q za}v9laG(aFg$`;lYS><0lSqa5ys#DWoz1(Vhwh9`3j4#QRp30Ca3M2i&Kv@v>_(S$ zMs?CPo&uL6JYiC1wQ&2mchR7tco3|^C_s=H{QSVFH}cz1q7p$kXi&<)aVuz^olLa& zMn!~2e0vz1%?8LpMye)JYh`asZe6VHo;|)i1#}~$aOgR=c@@7Z!>bzr-fuz85sg{H zr~$l0%#0`Ssh~jB)6;B?^wp~|;Jk(~KrF4B@(Kzy=rw-a(vmUK*Fu&To=r9K)w!N+p5tlY z=u(#pZvmf(BX<*|#R zt>N0wjDN8OB+^8BsJNu03VB0(??%6R%KD8P_mf|~erR~nb42tF! zrye5@X!7h5zMz-uWJ{|=wGP4O$e2iM+N6W30Op|KCUh6|#^#zWXnqb5U6x~9fCs?O z&!2$Yg&5>i^LhD5m?t4;B8MDPZ)E(0!s4mXH(tf?8mRD&Lo(WimFw_6z^T90v^yI$ z3fj=Vs-gJONGOhQ*gWc-fOfvO{Hk}V<8#Yc&woTEPWOJg# zw(Di;8XCF~akd;U=F3eJ9sAPDg4)~l?w!2d;>vf&v2nWJI3B zK0!RoR8Yu|P>^y4Vc6l>a0OMoE6Zy|?+HE>7G5 zv5&&mrdxn9!C!?)c@?Fos_Nb=Cw!DosNm2&qYXKk=R@Wobb?zhdjeZoM}yf)4&InG zj~fn{SAL}5P)N+SbQZ|?f?nOnXB}&jv{i8oNhRDV?&tN5Yr$+|RrC49h=09wL%ohy zFf9GV1Is57N9lJNcOgpU-EOcf4cR2(Ta>Bvc5&vudWL1+A5G8v7s8jP5>*)l#vbY0ne+u?)Dj|1l4^Og@28o0^PG zr4pd0m?kE;EwV`3w~o5FxoI#Z&7SmRJ#hE%z`5fxW#KY$rk6Crtmm5irb8DNuv{iwhkP0ktoqCuT;7RBIMpKyJ0!ACwHSl(i70ux|24)Qc(#BUKQweE zv9KCj?`S&q_Jg9!U$(WGp=PKYUYdw8eq*>NrOHnW9KhpI$JEn(7(06V>1R&q*qzf*jE!G>LPANP3fR}D4ZY7G?8c2$ z)H~~^8SBGit^e|H^XAPV2%HE8jUn%WI*Bqs0TJ=@M568*1?)p$+~nsS$cyLLV6_R; z@1HVFh>wTLtBF;r>ocEU7UwVcefc~zq-STBfuIc@OGS0n-T*41D^3O>P>^^XBnk+B zf+$%5%`Y39k5S93iV^{@?osI7iEw-3@b7Th`oCiA5Xu;e8QB~V$iQU|=kU?%rKGfG z%$VViH4Dqn#tfs-SQm;Grz$ZpEzMjADE1nVC%>RzBFcWjy$vEFB8bp}vczHN)lE53 zVgXQsKB6=QMiy5Mq%b-*#=#Ka&cY-Q=OrO#))%g#8L;%h`}N^`s}nQ?6FmfeJqsQQ zsD=Yql79FJxb+YHUQ@ZSK)qB?_}U=KMu_XycTwbcXb$g+YABuS3Zx;#uT3+}HGnil z_;xI0#bmI2UWI_7W0l}0vItl(0X2xQwNR?#5!@Kak;-{}LvA(ihQNJbQgwBISVBl- zWD0V1*q5&b$Zd#_tvU`WiU*>FZ`Xw~jB{FIvCh@@>|sD0fU+W9F^bf13Q{j*>hMU^ z)zvG31K;)c-+|X)Kr{mtcJe?Ji%m`4uyEOSI-EnGJ-Y0Fe+6GMf9y4R1R$*qqVeF! zNZ9@R_fc=y&9U>}hvkrl#R+6`gLvsn5{!SY6!>`0%xw=vL7Flzxg83FqE$W z<|1vMgc9PkgZEz4yxm^U_G0(7Ef#%; zFvRH1dU;pcn$$z_SQ S!Q3u#5l!vD?(|(p&i@z1p*b}G literal 0 HcmV?d00001 diff --git a/cpp/tests/neighbors/ann_ivf_flat/udf_results_with_jit.png b/cpp/tests/neighbors/ann_ivf_flat/udf_results_with_jit.png new file mode 100644 index 0000000000000000000000000000000000000000..78e11d0ced9bac9b43f591a6387346ba7ad49b82 GIT binary patch literal 293432 zcmdqJcR1H?|38ePVO1JLQBjEqDalNWhKxk^c$1Y~lHHb3GE!FAkxfZfvR6_@l9iA> zL&p6$KlQn;-+lb9|L^0t9mlt;Zy(-X<2;|w$9kUk&YeBMxO($yDk>^Q`IASLsi;;v zQ&G_tt)#=h` zdi($Q`shm?arA$D(Q+)4x_9^g{#q?Y=e+KJ{RV4>8Sek_`FlE8N9F&==c^V9bv*jN zKmKmKyJ6w3|MB@s)Mf69|KszY)BJyb1FHibJXjO2x~GM6EhXaBt5<{S!{NTD5lIQ?j<^fjqf8OyYvwzuMh`YpgC^y1PkG=dMG*Tx3;*6piZFuU~C5Ee;-h zr6u@V#b$oSro_@3fBH=Hjek>CS^Y%0K=JgMGx~;x%Pw5F@F_`yMf;6?QFVyO+V$(B z?Un*PU)nD;Wc$vkC+FlG3JncCm*?a#IoQI&cOm(xk&%&au>52~gZ*&%jES)^-HH_} zR9~pBBA;MYuyFgJ)wl0go9qL)?MRy@@>=Y z+qd_Lh};MZ+k#6y7k74x{m8d1(u*@YlC^T4+bxM|pXwivv)m$m;mOvIl`|$rM$0ud zH3Rl)ZXEiORqRPiKRQ1o+V-;cRs%|9!uXY0RfT8c(jS68EDapCvaaNFUqAG*0$ zUM3|SyT5U_T9SJ5@8e6ocJtKId-pQR%gf*O^)1aXsk5H?dETNue+&M4{L#)c=Mt28 z<`;go&$iWQ$)7wanu+}ZjgCIcTVVHo!17w2ce-!S+%$mX=ST;u9K%u$>9gL@7S?+ z^JcG8!2r0wOQe1ONA*-aYGq~R@!Z5e zYcTqjlIvCXf3{^`LB~K!LFxN@EDr+$xPy3h?K12x^8!6 z=A84_9J>|ERxrJ<4Lh9XG-W0xCieYm@FX@XP2Tli$DbX!^&y>7H9yQ$ziXS6)K->V zXK(uWFw!w^omUF#@9tj4$HynH(e}ncx-#7^>d39ttcPr)qpRE6BqN+A+Q)x>=37oj z_xwVNj#q=+kt3<*P04;=`Sz68Mo9bN7aHPetG{&=s;;S-$UU`Wx@y&`$5Jlh_~ zI+NPadPkoF2Jcrse*E~moZ!;aPfc|;Ha08i>5FgCZyKE*($vTkF{zEzFTVZSx|3%0 zCV?co-)t!&`xDMRJ2EYP=Fh4U{?AiBiA}MKe&#K{ceuU4&$g%1G*pqdBjbeMR+NV- z+Yrmv>{}5Ly4k;9Fi-vbT>LTS)C8p^pth(;Hk(;lMJ0_=wej?m{j2K2B_Ey6y)v|U z?$?N4VWCV|@I&3-ViUH81_mD@WtV+@drd{zb@=Nc?wvc|efl(Mc*QSX=835JMiig- zO%)XtGhO!utw%d#<@3nC=q$cdtu9a7h+DE=oWG{=!P3HlU+v|^K*hZ0^Vm)TxjRpu zK7F&GKsxr!)7#9PQYT}#h*?uL#GWaxlU_-M0F$A6zV#oF7qk5*%rL#r$ZOV(ClGSKK|g1-ZDhVb(vY*;rXw`#!n;7-0Cb zDl6IFw8i{sflA3)rVVtpwYBMFNt%WBqGmfz_B~7rEzXG}_m6Vr@l5W{bG(4vy7>~J zVPX0P1~jKmom#*5;`(pjzCD(>CXBk2+U)*VCm^AF$#g};u4qU7ZxV0UycVo zd$!GKvd;shzOcAh?tlx*jF`)uz0@TC)ug1P4{?f|8aZ4m)AjtS-@SXc&+3~*re*8K znU7Fycc_0Qy1@ayJtjE#+5Y#p7RZk&-ibbXp18{)n{&nZ0r^}fEoml~7U z7H4}oS9(S^%!M7YqeC%~TbP?tiaxpaaY#r}X(`?C@bKu^SW)O9JL}nTO4t|WT-%=2 zbW9v84<0;-Z|VP(f;QXY^ujL;kJkBD+tqlj9AW;wdpidQy*KW?sMlK?=6}Gz&~U6u zupknhC^aq31BC_!^AZj=grW8>Q>JkPES zePNYw+{eTr=AN0k-^bEa)33Q%z53}vvKVhfMeV>7Jj)g+(wbqyTwPslRK+T09T{!K zRww(jz|X<=;DH0Bk+L_|$KASfr>c@H^?}6LJF7FdupF{fdFtmP@axyF;H=)Ep}mP$ z(VaCU9rm5p(&G407r~5*-rU++TwTpTsR}BISBf&Co^7!i|1RGm%y<4pv_tqdx!dJG zW*2{t@Hnhnw=UJ}^B$CM+McT5JGez}Z*M&llRLtc%1E6&r_rgY60{K;Tr3?iWU?(at0Sa2v1b~txk%B*#>TRW+wT_8 zc^mIIly|9qyTr9|ZceA2qI!i?EsJ5&ZuzBisO+g(S%IGp?%#j((xv$5Z{1~nV{KR4 zi>j*VOT0GRxOI#6^y$+bU0t4NrtiwiR$zq{qveg^MZDEHgB7IzFLLwG$A2j1`9#x>aZ5-TV<^D^=SrFlj7mFTp{yD{v?eIy1f@O zKU{0UHxjfFB%r;grF5jdpe)U>%zCg{h(p5hocRY0YwLZad;-u^#`5gkxm{d*GiuxW zhdWNBqReXLUO5w&?}BYMGgijQQ^auZ-aYYOd>I2xDN(4>{)IlTUyE@_IkVnj<}59L zxFgNJU-@f+8)ulf!#gx6o(bI?TiOfBT1D7uHZ!C8%`GhzL1REn<~3h~A3wf}M%Fpr zQ>C6`%OfEvnT7&pL3#4VZAt3ltIJ^nGLe7|UD(J0c`0~@cXZtrrlJ@E!!=vj-UX(Q z;0OCY#@y#GAE}Qzp`LBQcEF<9{l*IB=cw8(mviwtcV9^6K#LI!Y6CdIn>psQS=4Ci zk-tB=H~A1$J>0use!j!N#{8KsdJR3c2U`cXpjDRnagtW4o?*;L#f3+rrl!_1T(A@G zv{-s+eh1xYt8Z`F3g*Y}&**GeF1vC0mkbld<;$1%eOkmru=!rT)~A7G-MTd!H{QmB zH9$kY)SA5)9emk|6DNEo2HSEs0mzr&y;2(96&J7Fy45%L>WBfriQ{MoHOg23MY{Oi zyALs^g86TlMMXskIZa-|D?KiBS6V=TiM;20m*4yBew?ljJ;Z`;vdCSl_-2nvEW_g5 zfS~nG5jr$T}esFUzn4VGk{-1*_rG$ z(olJMdDCa{d&zaw2VrQM`} zUIG;*cl>F4Jpb8EYk4V_^s83Mo;&v-NrIPGUTB$5N{W?^{g8A~dpqYcI)?I*%j;RO ze=acSNjT5!1H>JFcH+>cO`GCax!PjSu$Sn0e)^>9JlhleSkyeoAl1Ye7jk@{@#O|T z`7`+o)0*?styUK^9O;%XzgwgfLfTQ5*=J>j>1rQNxB0d0ZcBLvKlW(P#YBS=Y|gN! zE<~M6weCEUY1(jn<6uZV>aLJY*U_-x#F`wt-VZO%^SyunzE482Drm3WmcbmWb}3Zn zW7GL_?MsVy04p}e>%9mFSWgH;`uLvnFRV{CN=$-z?6>@)Dijs<<@c|VvCkKDdFd?s z4LS-v(uHH}xF3Z39I)H@z44`Hl;c@g!-Gfn#O9TSSdnbnqU(WSPgY4u-H}W7Y{bi|pC68n5EGjEwu) zkm&C2ZXt)CpX!8fINxE~axUNbn#Sz6yz3Ls4l1J&$e^_HP!3-H`hfA}%UF8`=B*Oy z^KR$Ptts{6jOd;?uJbV<>ostklc{dQSaYVSN{jXPU5eoWZ+-9GQ<8`kUg2Tv${4*O zz*WF&J+CJ|x2h8F!WXdXyPV%2{(q9i_^)&@clU+bORvqMX63G`s6Q_)So2630G;y} zUbbvmiGHGJOYT*xpkjO9Er&M(FNnWNNW@k${1mTrabU8)VFh|#)l@p+R|I<0*RN$_ zVgikJ{5`!_KsTSCKE5SYe_5&T7CwPjm)qWeX>hwwMrKwM@NRByF0XNG^#7mvg2M(yIwHFmppLw$1BO^luG|&c^h%EN1#dt}_QLeIukKcz{ zU!|s}9{~=05EOJ<^7?eS@KKkEnuAmyMOI{+eO?XNC1%}01>D9x)tIb(pP-=XC%t`r zcUN!NQ;1K;!vXA;Gcw}7+@6mWarxxWZz$nd#l-=!-c@Y*vEL&Fy(fOz_Na&lC;FKK zTx4Eu+l(t}#s>hT@Fe`|C|X9!;TCcG@@Zi-={t}UHUgX*wA^N7-{-dItTEbvf2K?@ zZ{B}$eyJtHgs0^QbzyTeD=zi+)2HkV3=C^fKRY_~makZ0&bYJgB^q28cJBt=6!kP3 z5#wriG%fzz-Ve{@GDM?uFV;Ulrj%`=0fJhHrE&cDamS@aNx)-)Rwi6{De4-fg~pHL zkmp!;8DF;U3sv4vDY~5K0FDcDqP5|Yd{vv{edZ+2C#efLUb|KVWQ*7uQx4yFK#_qAUwqDb#vf5wr+IaN$ zZ2@(X9V{mc1ZO7dq$xcnb>YXFlC?8JrMn$&uS1inej>_u!R4=(_pjLJVh^nTh}QWs zX?_dSPmSVjV)&&9%n;*#?tg!iU>6YQ#R2=FFQSSxQX_4-yDBncqJgJ#R?p1$1nUw- zsp4D^8+L+z^<$y*jtHWUx+=;Rx(~K>-c21Wrp|Lw{OR} zJ46zD4qX1qYTlTn^z|y5Z;@cZ{B~ZE{rlg)IG@PPX0M{GyzlZ?v5NFA6f)o+pOA53 zc2TqS8wGXv>C_kIC+gBWilG{8+rIsXu=lK@^a3~i44u1>VX5Bm*PQfDIYFJbcR|tz zKE-{VJjDWt6?*vUS!d~?Z+V(RCu<4|sn8IQKRsYc6}>_rCpY($=SMUg>$yqOmU%ps zVjyLuEVJ`I)Q|B!N6?^oC>|aj;wzkK`sISpKHevr?p-a{x2L7Rt$@F|0UuWtEEsI0 zSEZ;yv3#kXwjDQj7cId#dTW1O+!cYVwzlu?vFv(3=t_Fl4LU|6He~Ud7A4)Lw{{CvfA0kJs%Q8pe?2inLqi5mDXoobVr#xvJW8`@5q?R1 z31qPNQ-aDrca-j4SMg|9m9Y7f$B*^!5uh1?p}K^Wn>PuJZKKxqWyJMPV2>yhivOzi=q4Wn#ZwuJSF zo&bdRT_y!MbRU|Df^ec$b1F5Yrogv~G|QIpLH7rO)%^mkX3U9)EW zdTzQZ+14YvWtSOx z=KA(cW~i*{Xy5!)q9W4L(lfKOl$KY1ow%WyA5X1PZF>}aJSL+#u@C|odbD0!>pM?< zzVHvrm=a#$Zd`^k$W0v~Rt!>SuXr>dZ0!1GapCS^!~K4xP{T z>qwF}?i(bDMRy6X4my)UKg}iljdYyp%XUj*W9bj`uh6hMWnO@ zcovB0sz^AF%B<6hoo-N+=G7@<+r0TXR-5>*3{bKeykm;T$j^xY<S@b!FY;#!-} zACty~2Ausvvi4!<#d6B|SB5SMb%^ZScW$G*zKKcP<1?q)(>G_Qb1{}|;^6S+Rf+9D z_pE7Lldi#Nx1_s{jSWoslvT4eJIpqCj_T^_cN<7QOe{cU3A;1o>*q%r z&bb0tmzPtZ8au1ro5Cu%e*JpO^?imPj$n}ihHf*rLhToy668BH6nx-kwU^n>ojcK+ z9x!eS6>3&_dGg_QAYX#33hU~au{e*MKJ9&Wf#zdIuglCRHLzJ_`sK4CHkX@IWyilj zD|vd*D#%EFUX{>LC>dhcr<1z!XE_K_N5TJmahD;Tl1pFftCXaqOZCswg|+=;d0t-3 zI&yCv&*)%_NSX!ejC#Ivjw4ff=u7n&CAxwHgr!&q~RG?$r96dGh6NW~5x?ujy5g|hpa8sh%WA~h_tP4B*OM}*VqPzZQO<7XlD^xsx!SwU=$&-a3C-2AC?l|57)VY$GdDFo8 z6rBZWq0qzUM*TMYA!Z8yI&aUNKcwhNYZUep-vrw4`aP;AZC>|nm2@(??pPBdVq$#Q zPD`y=?Wg$qh?EpTD2y3dgM+V~ruIBJVEJO~j=ZHX-AiZwLlsof0WbH(S(=WIcFL95 zg13eJX!=z4HpnIi?FstkT8JWo>jbf%#jkd0KL z_dCf*s$h3maog9ga1TO;vIC7A!j&XShsE4Hm4y)V<{=A-REBfa7T?`$)uvKKYsG0E5dE@p=DgPM~lqdLJ4~I z4Rq=I6g*H;=z)rMBn*82z8oIe=?JMUU62&AmV#=nUAN8?ouT4Kr}eNh5KgsH>aGXUvR+SwyD!lnZP>qeVogtu zP2Y!Y8X?dUU#KQLe?|E&YG-GsnykgPxVY#gD;07__Os}txC3vD+U#s?mHC^la-F); zbF3hM=5s){-IW`SpIztr<2Y7ar+LFUF;}TKg`TT7J=o%8%9jG8qp+=Q3(?*Dwn`c` z1m0#=d(O)xWOwDloHSmwSI;$@%dGqK-1+ZE%sumWTbwxJvBl3MFd|*Yb*M3avQ@<9aDPb8i~! zdmVhaR(ciR&&|y>pO=_#R^79KtT5jD-rn!=vDD>tWe_L3;;Go!*x9?mX=b3poO^Xy z`z4!1J9r};MIznG*xP4rZR}b~(#hNG$;Blh!9mI-MeyvYQ^lj5#e_$e%uaMUR&#`g zz&r$CIOQn=UaN#g3|4cGA}cGq2FliGKXi3~rry`m5g%Qs-6iNdcKzuuq56MukdoBC z0|$=7!7}PWy)8lEH!6}xC7OS>_XadKUZyGt4pv%uP8+Zd*DR;@$DedlqX-W-RNd!?C~ z{UkJ*&u9eLvW(6ow7zjjOiBX9LIYh1LG58s5S{Fr2lz_t`M#g^HU3L_I!aSlZ-iUz zc{iz`<35`!D)EgJ*(rr0InYE=}fm>C&AtLvIR5eN|s zG-4W5sT$7mjlE)P03|1#A(rX`w5`0oa-2lEq&Lt3(8Z?QQ&l?)q{3%hPxmoHl8OBQ z1KhqcsIt~D5NCe>HUv4J8LtwgG(9LPDkAO9^}>%OJ8Nqh(7t~*lAevNt?+Tbt$UxI zXfKO;qI{qhx}pNjp{M&zx6f`m>E_r8R6rz4o{0k>SQnVrXlQ5@efe@xExgT;_JgzE zLv8duRp;g?cr9?yAL|6k{&D{1ing`=Ns)}p7q~8@LT!|9fi}6}hLN+2i?njexYga^ z!E;}ZH#Rn|T)FZAEjP5d>GIs|c9SJ}0rklLG7=!i`? z4CnI7&HFD!1~1;cb*nQmC0{x805rKg{vFc08MDIu{kLBm>hE9W=H@0J5*WAvD)@^A zVQkAuac5&=7+B;&`Ex(RV(jeheq>U$w>-di7B{*DPFqj_rPr}3j`T(BymYEMK%+v? zeK>nH!LI!Z{jaINm-#ANSE5!MQGmtefK*(?gO{~P2LBpp8p)`AE_vw#S4FS9g2HMd zIDUO?Bd46A7$LP!RJ0~tQcg~eWGDjnoL^V9j)g_(*%8``2cNDEf0cz6V_|7ImAcW^ zu++N>D*m@Pj3?yJ-_kX%*>eK*EbHQ(tJYWFJ|oll3UBp&c`baVTjb-#E8d_cMtGI z7MO=&$rvC(k#1{f*iCa)M?fu!6lVo9Jth+`s};@V+h&L zU|0D&U0Z0;NHaJ_gk(Ixyy$sQhUR>i!$c?w>i_MinyC3o0B$08#F-Pm97Q18B~3r8 z!y!|^z4CcnBWYLQ@s+VRSq*KEm=nrZ@S1&?@9%Eg1Tz{+)wU*=dpbQZ94ONta#L1*9vMmX=m}@KDyf<`*&W2iwd=Mg$3jh{6IZxWTGOTTR`B@*Blm>ou|*u z3oSy(MKZxD{1;!r?ky82kAZJpVdWa=)s+c@*OhRE!XeWKvEJN^IKT~s82@>f>JA`o z&OOt0jg4v>kNVH5(|la!?|I`!NfV0hJE+ls%+{1z)m7hzY+jH}UeZDAO!>JNBi%|hAf^5}EK~@D z2(A)1uLyrIV0i`ThEij=6pd8iw_d2%=iXjzLHJ{F;g@dMm-1w2Vo?BFFuF4>F2b)^ zeq4?WwBQnQ7cXumSIwT! zLIuS*@MTB)uaR~lW>bA!W}pj&2Q?S&$dNQxR0<;xTG~LTJom|MF`SZ4H&LSZ(1{sU zbI?XJyBvZ5RvmWug0d7$LE^;|9MJjh7IieU`4%ucFpIlvG}6aQ+rUkTi2^yoe;lUT z25M^ROCO_GL0(+DVZ$xZ`fgy^c(s?C!FJ0eMl@Z|F}H`;V~M1q^#$xXznVBukXauD z1OzBbo_^&#YkTzg@$I0L$_P1G16=;NzOe!g*Q@9O!c!|58B54_hlcVewnC9=vChKY z3jBQE%j*clv-*6O>)bp%`tX7M?X+C~S;?gkcu~DdAR6s&9GjdxibuO4j^vN9+~6>< zKnUsR?4-VW^(wK<%ga|1Q4oslS~fP`kU9vvAc`Lc-WbKMzZ#h<9ej3o@bLl5%?GT$@gGVdX%IZ|OJ83L zRR#6H}LG= z)zv8~L(Xs@=`|E5>IVnQqAub-(tuA(Oi3ryNROWR0vnxF`6Nxq-gwb_G^^A;B91ga zTFi{M7#N@!B1{W>O`Z<84+*=0LRI{Dm61_3eNE0K>3vz*4t{=yO#)iyB$%oCHB7D! zHg98>^!QV%$2tGa-i<4vcI~@1Zd9Vjo;A<3X3t%O_=Ie~pVUY_YhjUO;n;*KV~A2O zp4AFjN@MgSL`>w>xbw5KvTAG|Jb)iU>|R(vmA4i0?fd<^{Bq17(xc<#mR6roKz54A zmtjA%A(kSIrB>~IVNL%V9n|g8o2yuideEaJp`mR1-6@UDWw@`f>^nRWJZsX;(jx;$ zN3HR!5I+l&hQYx_KXdxjsYii81azP19PloPk4Z4GQT$w%bcL zER*_#a0D#6)?|Z#AH!p&v@W2C`)4yUGLjbsi~W0Y@u7gG-f=}3D>pLfoXskSLo@&s zu)bo%7#xkGj{Er2GHy~u<9I-^I(znP5JL)MpR2DaGBg_nwC@^{n+}9aPx`r zo>O5ds}Qld%X2EIusmLGk>3E(OYpvyWxQrVK|$hfw*llj;H-IRu!{(bh!j8ACYRpU z6v}$|3O_DoH$78?!J2QTi*3e z@cciEr96DN7Mk~N_8(GwZ?^JH4K$Sz2KxNied9!zKZ?Vg*vzG^|AZ`*r1q%BuK`d= zw0ZLavG_=WY5#Th2sJ;LIn*c4RzPY-NPfwRZ2-t8#-N)NI)vos}Nf!aBea<-p zJK$5|1;vQAPw|jrUfcJt0}YU_9ABM|CD~EN>Go-}MC0>g^cKyjojax#LekQNvM;yo zVPo2OlKC7gniT%sd65Y^dE$`a&pki7s-)j-Y2j64wK2ADbG9Wr49_PQw5vasi3Piv zks1Mh!h1F&JNq~at!VAjw#YMq3b&9@>u$9wsA+VbHJuslgl_n{QQT#2vIwQj;j$gX zy&1%Al-@dyeZ3;sYGGkPu>a&x>xH-lM0Rf@4k~Y@k$;DQ%@ds5t0mo`9+tIMzVji) z?BKY#U2rr#Am5?l9T^!KXn>Y3I-CLMPz-j~{kEm%{Us|aKM1kT0Spl`bG^mLp&Iod z$Q#Eb}mYG`BB2|q2y#Mwh42v=dt zry`R{Z1mR%zjbwWk+=^$LPISJ52#9F51_(5jv9LcR|7j@Dcr*9gKN4R7q9yA z;s&TH>5-X=fk?mst`k#{NN#_zVpgr8k66#dD*dI;freL1Cl;jR)cj`Sp8feb|Z zbKo@OhtLm?fjUZ=8SjH~7Yff9UgU8-JuWojJIGXF0~Wr2zg$2-0Ck!hiSc^qj(&|P z^ZNiiiM{~7`VIjrL`j$+xDpuzE5iec36F}Mm-O{LaJPl6t?X!6ELAYeK0v4<+fvtg z?AW5i=n}F{H;`Y2atImHEE1sp(&ra}jgi|B6~_``quHu((X|530Tto~&r`wS^_bZ*N0=&@rYO^DFs=;){n3NQf~dC7jbM;aQMeaJQ< zuEVu^_e#)#LZE9nAS+>elII3ZUk(CAMi}otKG@PSNQ|UG;O0Qs6Xj!FQyY*i$D7uM z1{PhHsSReI;vaTX5>gyUg3!cd1?)K@6ob{AfD<_W-6Q0!G8Ts8_3PavCSa;T=?X)D zn;;N?w=R^Sl}t>ffOrIXA+G{eotSv>@C3>CKp!w?-NVOtM4^I+UkiIJ`oNFZ_bjRSF zDItPSyhQw`nz4NYSmxjeWSPHwxtH*5Xc**yAVG>qX6KsWKXg8VDsWVHDfWNQZFEOD z{rRr{_n-fN8YDTH5~~=&6sW+n2vL1^LGIC`OZu*^uKsrZkgK3?7)?R*rc7avN;|pT zTdcxgC}qzI3fx?;p4x>*9Wa@DIz;&7$&&~${mBrntv9*-p8=fcgFroG+JTz_*HAU- zDN-HaO$Jz&B*zD_gNmO1KF10zUfO%{|DCn?AXb1nde(*O3DLao%8$)RgZ@c9}c?o^!5f1Wy9*f!zN-(_?)$kjmN!vtF*MV&gEXQ zB+4=ReT~|6aE%#606sw73)rnhV{B}k`s&pQfb@O#edjn3!D)5j9-o>zR^iW6(uBNB zDJ=Q`KgmB(VEgmBhyR&LxO-QM6XK~XQcp8qunUntIG2PfaIu8{`ZYqGL9oSPKZ`bM zpcS6zuj(xa*8$lJ4!Cyh8nNPL8@7F{EGao=WyuKKMp|)cKknwPw>JS%fR$9B6%fs$ z(CnQCmTh2w_|kBFXDPy4r$fco!f}yPQ1CLksIJaR_yM$35@Q8mdj~&9-za*44<&1n zoiJXM7VaM8Pk?_5kzyK!0Qnq{5Hia?-5m+YXK=t~n??TK z`CRC}|BOgYl6(aaOK`LWLadty=sh1aX|Kz3np%ylLEq=Z;I2uu3XS%AyUp>qTV5<7 zT$6dJo(74oJ=U@j8}_Ic!er#3pcBu2daCG`U)N7!q>u$2#1F}8nt%bUf?~V44)b`)zjI(tB)VE*zGB4+XN+8k*I8jM2{?-kJG5&rqvF>{ zKWbRm3(5>B=E<%qo{z7pW$!6~qB@vWefY2j^1*vZNOrAzsbmm@JC|X6MqNXr6U~S4 zdHyx~Z~+Jj76R(%>FGrV_xzN@gE|M+F+W^t5`|}s;hJKI;*DoU%m=e%#L$iahU?$pcn5o)zFA8j-ev>N8-qX0blZcGxu7*p~MK1F*9p6EGb zn#S@-vWY-1r~)mQ2k?+djycI0bkv_AOo8q2!uqjSQ%1y#A=U_aslCgnCLEY22_zJC754hdt!!h&1Q6(OQE|C!X=?X zQJzJl^#9I@8!TwPx~xC`y@IfHWDse9cbc1FE~NOP#})ya0m@rnz50)`gXhmB zB&1}%^m(CThJ^fAdb)$*L^%vJ5+eU6sJ|=Zps?^d+*#2kU1txZ_6vbn(SKouyXWQ} zh6Q~Wi`j5?yyx}xU&*ztnOt)y9{XJ99rv4kTDfuK#vHRmh)M)K;N5Syqn)hzD!I#M z1r;D()#r3-F1YpJy~m-KboTYFgf~VUVvx|fX;(}p2IrBv!;JHSh^5HgX7E6g4#N!a zOue8pR5VTjOOl)h!KY*(0D=n8E`%4ApaltspX-nNa@<3bBob;L0SrLEWW@sL!{26_ zA%Rz-J;T}8Y~?x#SHXvf70}p@q#2ahehw-qC>V4UJpW=nGWFcNPsMDuL;B+3g*u4= zn}X;;;)}cBk*7M28IVXy-PSp{8&z`|&a>kjNw-vyBFbM{ypE;245k5QJY=Db5hyIg z^`*5{bNXB5TLJeUc46ycCah1MyxW)`j&v`c)-hZ{$KW7;xF4Td(p_9Dp@$cHiTmQX z=!eJ>N&#wwBocNY1J7VVcD;Rb`9^is(?fPUF*|e{`d`u4uN+{-@Ke{nxjN#D7mE`h*Ysf1^{f6x3j-&_ygl1-5ARy0ibC)nguv0PI8*Gq-@iw;-Q40|SZVJ?b3ftL# z@sjx3KT^rvyS51^s5De#bss*QLs+He%3-|Dpv+}5qvPWrO?8m?XnBj4vJnBu_f6m@ z@9>}BU;Fs+<8m;F3W@yu{PZgbE`7oUE+76Wh}%r!?}QaXf>p zoBl?@4SvT)=ws1Y5r7|$hUCt;fgQ8-@(cfj`hlkX_I z!7$x{7IQ@Hvbi~5Em`YD_4P`<4%YVepWaAL$@f2A$-uCFet!NY^omvOuD@TVVnm=5 zee8&tnVD~JRA#thxI}%G_}lBh&ZaFOmDTg24eH2Bq&pj5M_{MlLd=>TNGRaU1hQsX zaRaj_RPwOHRsuqJxd;gft!+%v6b)+yXoP`fZMxxAg;jff(~W(F1u?*ET{aGxY?2D#tZPa?Ocu0jt0fFm|y2VW)(>;){xhZ(rN8lFoP#|ZP zKs({4fZKuCA3=^A0{K1slfvx;gwZ7lh=V9mSl+jk&*TK;_T`lo8F$ktfMf81WQ`9W z-hs^!ET~(G-ofJl$Ciw%fNFOFOZuAv4IYEeB5JXL3z1}0w+jr;=E&Lr6uM*Ukc5Ii zm$>YjeHP98$jmu7(}(8#i;x4TNcVD<}@^wH?CR3rI~!^&I+@pWJZivV%(zxV$*Av-v^)tPv$sreg@N8gS;l6pmOwG zg9Y&Is!Hs3k~0&EpTVllF=M78+v&zkx-cMFiTgN$Ohpi8@cYG(=qIAcrcJ>^(^K0} z*`UtcN{S$Xp)Z-d(D>J4Fb8kp)N~EBgHd!ubH*2SCtaiju!jgSq{5|Si+#O*g@OIR z>FLx}R76@rXFTHUECK0p6l2sj7mpmFfqLMH!UWyrevd*~P+}q#33GPZH%`S!a3uS6##j`;|{7G7#N@jf*hN|gEwla zfY5dmmf}fIPPktm-Z+@c|ELBLe%IcvyUt=9QO$cEW^5$wijWonNKW)TVMlMTWz0BX z5l~UV^VcYh1C%1Bf9kQ13mBJ!?vxOHI!0L^^^N2?)Hgy11P0;a=ReJVN?yL`YP-uF zfN*mQ=_v33q7Y-aVqMiIYMY-_(cUDUi_B$JWpQz_4fb)%n~nQPA`-|7DG<;3;ni>) zJ-~#^D^~!H?%EZ{kwxM)V9u45p#I+2c+UZu4DwL?^a1S-T|9dQJ7Og&%bE;!#mD=o z3l`@RlXRfe*uZBFK!S^E6s~k6oML}*{Bi70G@vBCdgL>Tr#`BEx?T>lD_RAvTR?(6 z6B8448ix!kS(Cya#>7yvD@EuY@nwZPownb=!I9(`j?3Q!;=eb%ojg?tz$H|reuOor zuUJ(HdPC;t(60+Q;*2F1wEd{-gn|%ZGq) zz-sYDzRZh)K+OzFI=UlM+8}BIVgCC}YB|#QC(_PC7Ql4x@$uQjE5C-d5fWH|aLvWv zeSl?rDsOcDGJ@@u0!;m9|5nKyf1x#sHTCsEoKDYMpgVJ?xcweFNanZvEqt(BkAb*O zkH%?2tS8wevbKFjT8+%@Kcqn5&0oi$sT9cGHdm-j^&? zB^`$?dw`oRwpO3H{sxnae*Ao?usTWms<0g#92|wAVU+9QqBLirfl*cxI|nYhjj~(y z(|FOMJ{pPU$6_|m`A=%NNHu=OJ~wK0fcEaMr%1IAbQ8NeW6f(%3w{@#o3*)zPb7kH}}y45)KGsC}BuVz6EfEGDL=$0i33rP$e|l5N5kCD~vgBOw8R%I;(Se zMHYlrtvB}aYRSBgi0Kcfr+@!$q=li=Dhi0s3iOT5aldC~%02x6Y3?9<{TzBcnF~XN z(i1YyZYGhiCW1#GDA9m9$Uq1_2Zt2n#DKeNiO&wLj6}m*oH4ruOQ-e3NXi$id?kcg zFqd?y64|;mOay9BUA}@S+<7%)OG`#Tc#WKpkP!Zr{DQAvzb51A{=#SEZoI;P;rJI4 zplCJyk#r}diAcxRSFRk#{OK5~es}IjKEXCd9?v5-pJ6*7Vj@%fLc;K{g5{4`Np0S| z3wdwf>S2O`F9ljTUR}@-<-3}h*$V^Xdbp~5uGyqLs2#NXVR{A!aD)yl(o{y=BGaq} zZjurbMh=|PuKa92B`4AxFj_&*5FlzcXaNbikila7wIRn(p-FajjP1zZEgHf3@08G5 zrDOyEDl5|uOPK3qq^3Tg)i!4MnE*!xrrCtWEOAXvo1$#WB7%${LzsvOpUUcWHI`RW z;a}TaGK{HCqQc24FEHjEyY ztZMu@YMAw8^gDy$o9Y{;kM?K(gjTk1uVnf^UIErqIY~H9JlplfhvU#Xa1F19aq+|$ zM2Z-fHUk8r%`JPt+O*C@^cAO`-$WfYVe zsD*al^#7Kc=Sy|F3l7WtyI!*~&0ASH`OxK5J6Q6)e}V)#0)TB@4?5=>3*6i=u_8}% z6oD;rq(TSeIsZ4Xxv~rjA0c}Hz{*X5wpgpSCd_HKevFJZm&@+q?Z>?OzUrKXdH_^< zZX-VnPMraps{C2twxlC85H3(8@`jEs3p0>7{o^>e3$W4+jQw__wh|!+#B~&QeA;Ck zJwo!~`mle+vlfJ$XGUe!f}ZL(*co8L6vug_^J@-)Rro+=))cx=IlG(DZ4opE=W4tz z5HZ=j5CE>Khm`4KSmb0#jEn$6PIR1~5rQ#r6H17D_7WMe#G(fr+r@c3OcHN1UMKI3 zMe{Jeo3HW09)#p?a~Bo>W{4rw11WL;{%WNw3NgQaqQZ(cHcjQ3>2cL{M+VpkPxRhe zcc#}63p6i6ZgOq>s_2N{B5s%GW@dKN<(vD@+BG2J4$+<!;6foUODcU|O8ep#mwRs2q495Wr zR_Of`&Mkm3ajV^#t}xH$I`-xoA}hWe`c2YbywuPdGj2j z1*A;RGR)-QJbi| zFf7SaQ{T9Dtq1TYnc3w(Z1=>23)P$lX$BR5fjA*rZs+_J?759VD})hXq0a9=2D{gy4X@(664gM(8sCT>{DFpmP{ z?}W2?W}|7OsEJ{V(L9jXDo-a`*+MXl4ZU^qb8!)_{sGwN%$Nhfaa4~2;M81b)PY%S zyPxAK$X6PuzkCap-&z)y9rUqpuM9mIbWXY_ZY{kdMz7x8-JQQx7_lx1PRuTFpexrW zNat)>CzXp*PEHt;4*_@hCsaGnCQTnGcIXfe;y7h1H4FP_pHcZ5`8+5$ml$SHsOCQT z%KhDmdmf8LAO6cGdb}_V%73>Lrm%m6F zARNPwSHhP+=&rj5HNGO-65=V5Z{cR8@q^lfP&VbX;n!7F;sSXqZgOA{623zJr|^#^ zRX`p#I4#6yf{e`}1t5bDCkMtg*iIhnpD@H`oPoN=-6|S|6~e{EMTLY&0PPZaGmj5n z6~I{&i8dh^aSsmUbzzF9Fh$Z*VSdDC>OX?kf1;L01I)J^z7nwfV03J37jEvN2#XY6 z6gE*1S;C5G-*6%hHzhngoSc2-I|p=F!ch~)OlAPFH3+%&aoA7!wfy59D@@Jib5Cdk zV31>tklQBvlprYli%x;758m1-PgP-)UGV5P!>)y-A;7x?Xtj3D8h7x)!j={`9Fg(b zbwQkzXy|Jd!YhOLE}U&_!fi$(o`VG3dXa8X9NoZhmP5)}j0_sHeG`M|PE>9JPZ8%P z$7g}sJ)ku}!_@~Z+5Af73OKet9-?}d*#;=?V1u+on1_9Uyk{SrdI9?FU72 zgNO3OK;#ev?y%JmG+c{{0xG$?wgCbZ&JQ?e^93#r<+qkfUUkvi5RpDc0PP^rO;mfp zHz5gE`nGx7I&rYGvUWVR$}fWb$U|sN#}Jtn zMB`bBiUR=~E+-8b!)wQ}=LWG&ep5Pl0V({4e;@+6VSvI=O-o|dW z2%b7Qjp-jr;9K5(Y`&^55D|BxDN}lo8YFoa?xHb})+4CfWUd_=FcS**qb&nuJ`Vy` zR#J{J8M%YzW#lmkp}Yt-9LyyjGFp$Gk$onU#NlANZR5>#o)t!or^f0cC>_G*r$ZAz zkjG9ls#qt#ZwyW>2};1*j)dd_GwujZT_SXybUP-E#WbQ$k+d|(Vu>&WE8}Wb5_=?3 zAh_8Na0n6+qR0nA5Mfy@&e|b4K-JW z(tg3oT4Ndhf93t%x&O67a~d%qLli#biVa#&3A5;&^KlOlDXP+kfL1Gimb`!eU93Yy znvIREsHtfaB#-lXqX^LLEkBK*1bQIWYce){im!&r9Wj7!=uoN!3Q+>XiJlA6f^y_4Agl{6#J zIQOoaS?-QbMI`5cOz?TE`*c0=EclChu=czf$ha9!Vlk);;PoHJ`>w3jHiOqagHtH7 zk`>h;P246!Wu8eOINq2Szn@}hX-P(48`4_H8wi)w+{pdpnl@3M@UD$p!b3vXNLri( ztZ>}ngVWHT!n=xDXNRE4yhqD0l8Y+h|+T=p@7pKr(v9?LE`%KY2_aS z15dl9{x-X{l=uIR!ra~61jkH(T}mA7b>P9brSs6bK8ZQog0UbW8s)HveMkI$FS##f z-NVDlkF}Hjn?20&KSqNlksP3qbw^>Sy~;xm+3EbP zDkhtTno=UZ%|+frgbfmVx~L+dM>uzGy?~aOmH2rX91TED%{Yb_LuMn1R^m96-)pZ2 zT}}V3^AZtb@*DtozAO2S2cefCBjAP86Cg~SGuK8~fQ(Rrj9o(``1Pd;5 zembX&6C>91sqt%0^pSYl*-$YLH~}WJi{|;RV$msHpHnN!4x1B$DT+ z+m9ZR8EzZQZzXBIVglNdML>j{;|cu$af4?-k0f7)5IeBr4tnc~^5WuUkVi^L6pxja zoR7JUn|nE4KbWfrOhHhMRj5$p2%$6|5*~#_jPt^_0c77I8~wJyg)R&MgAC1KCA@NjG_Wl=$@Zjv9tspVCv0u6Xd`Q9$=Zg46M|1yG zDXi~O`_VONrKJnzhhONxn`1}R z^F7uTsUZ?Ls1%>QE@R>%X2?E&-hh5ciu{KhJChO_u^5XugeWT}7JF}ErJ=KW0|e>= zt%}+AtpS|z!t5MQ0eFWWMEJ)YQ^x31kt$EIgO1`f#g35?KNNCu-~vVpZlJK?WIJz^ zb*Qgfq4?LgI;keAF+z|l!5$;WVvy^>c0%xzct1eq%qLEiS!A`4d@K$yLo!)}gcspw z6jxTRhIw`rn+N#zfyIj7Z*p>o6GCEkB#=(t9GDi?oD4vI(#w~fKYy+!C;dR>#l@C4 z=~wREyO$h{WB4W0lpHVyVcr@#3z;Y+r`w@-K>a1NZipN+6MrNwE^hGIeb}H;7#$OX z`Hn$JPn5SV{1e6oj^p=Wpo{D-2)DEd^L?Jvz+lErXjt-bu6%vXfQ>My1OQBGTj2R+AnKI9c@XC}SGH0GL&+|Sm@B7*N{p_{( zTEF%CZ@0eR_372)d7k^euj{^T>ZgkQecC}mvxIt;f$Y|#C7V|zMxVO ztm3nfJynw)0qoG>&m#Z1XQx_{O8)tKu&nUh=w`6E3wUtDX_wEN>MqRBwJc!wzTC9` zQk#n2vp1@1uS6j>9p1TTV|zU@vZxlug0c1Xma{T;`sa2C9s3vaw>L<#0q5*U8tr+x z9q>TJE%a$>_z$qNH)xl#{r}nbT4Mhy+gD9~|MQjqfmO->7JD`S=NA3*nT!AP|Dw6} zzrX+glkVXEhYPs270DVZN&Fw(rcJI*4S|y9|E?{G!9vY(ELqrz1BJ32wR%0TSyw$J zZrRTVz{bkhj=Wvh`!XX`yps% zq#l4V4}j$tqcFZcXnu0WLG=D!fh$-6 z2QUoC%buWgY9@6EfM|%B-J5RDwRM|iUY{NzX6gMTK3EQOH#&9IA!GdA+35g&xm$Vx zr({=kG!qCaS_qi{F9#vb#<_hAkWBc*yy}}FzBuTxeLi0n8%7*l{S4^87QJkU^OV!acj7u*6u@djUz&aM}#nj+QJ%vc*u7$Oea0ihs#8KNZor8jmzU(}}_6vk(?88bX8hU63u=oA%M zvlqA69z}*zbYx^CaYF@-M|?jCVh!lT5fX#%iG_f9%aDVLc|&_mJQM(S5|9jJ!x!)o zL?k;G>AMx>#9~W`g!R<$emK+)UYmB6BF$UG_Se^UIPFjknkOD3n|MCJPaWmvwgoLm zPE6vC1onxf8esp5nT+(sDookYvVr?Cw=vhc| zVWZ6(X$a%tCj*%?>us4SimU`}ZD(XAt*8Ql60AhtWsmT`^5R9)85vj+OgtF>BS$0; zDt>?|`6XvwDIkGqF&qw6L%u{9d@$qRxc_FI3I3A|a}O{l_PE|*k)B(zv9aYQS!1wo zqHJLD0D1Dg3R*N@P^5MPcZ!46$9VUC66sdMUvS3$5C4R2pbTm{>I*=cd-?G=^^n}sfDz)NZrt|R4E5IF#DoJ%e&WuG zjN=ZZ8{34fEOc)?dA4R1fc`S;a*zf%F_^?rV8M^@CNUF)#%0seuf(MZkT~gMge~J5 zXf{5hzAs1zH-(Jx0z^MV85M}DBGNhF&^XG7@)0fKv?$9&AgKLK+TnYf+uELj;{d?1 z4H%9H5EuI1NH9Q<4FgU*6eIiq|8*cM?bSX)?Bi5M9|h@5y9>gRbX?*ODV&MxJr2o9 z2pHiqAFy1!Gu&Cp;+`P~sizueE|6WvREo4|C~IME!|7?yS&*IQp*BKnB-#m7W;=m- ze8VCEp*ANlSFUL3k^oE77c>FLnTVr1d@_hY0_CUrTtPV2qLpiP0SqS3!<`8lReOkn zT0ol-co-(6s|lNMgxD|uaw0kSRDGRf6H*ys=$Wh8!=!-Cy9%r7g^u%%6KhZteRt|7 zxy$SIUbF2LSLTCytqP-`GVmMyA-OMZus;1@xEe@6q2nupS(Qx3|7?ep#U5K|4=^#X z^iRUWHKRP;nb1o}0bGD}Ywj1B0~}?<)qKcH^wOnlWONjTPF2Ej{rsGeK))c2HWkw5 zL{NN^$MN&03G^P@Xt`@wx!<#{W0)l20Q})Sk}_ow)r6>PvYgoc;fnVXida7S3tMw| zCm~}eus^YW#OBIIQ_N|zi{7}C`_TOYl+&Jh6@WTj|0v?hoNCnP2r)rq4h(=Yc2Hio zua`mzWrJ<9RJ8PHJe)3))6#OjFwb4jM$0V@gwUWOe!x@##QwnY`0q%m!m?GY9G0%x-6kuy6?y zgb-#6iHX=+(tB8l0 zsGUeVum-$CaQ3pUUwH!^j>KvYA0l!BYG1VHENr?T3SvBbi}T%=n*d|Z1L&2SC|36=8>@=s7Hfv zEJKw^6vE`VL|HB7|Dz}Z5fizJa!fBE0swmJAVw#hPk6JNaH@rfHMX9^8@oq*Hf#ET z)LZATx!wf9CrV`~dJZ2xd`DH)4trYp?}h`v76HP$qnXnl?TsMuiNK$f+@R_}OWLj5 zc~YakABP}=%Q}j9QIYTra3u#)w{dB0L_mVg1BoEc=Ope!rrlE=?E-ZH;pXJ4ZwJ%~ zCRO%EGESd40tLdOOw!X1#U!cjpd}{=E)lo#8VJ0~Y=VyN1xkT0Fek%PB)iwu%`IV^ zAOQe|&ml(R+b@T~7{JIoe|~*-?BvBE79@G5L#qf;Kp95sq#*|c2%0$`u7%mIk<0R6 zrT5_jI;c-nC-k=uYzEl6oPxi#?l_U$b5cH4 za~?uBa4y6oPQb{%R8?(-U;<=9gm) z4we}S6N3_>uRCN2IMf##20~3Kz@FHEpy)y zdPrM1v6?4JZ&b%v&BSI)yCGGkH^V(M$y^sX@|lq9Y--BiEf)Gw{3Dmj!8Kyx5`7<@ zU+y%ltqD(Y_0x?bWPjmbu@7@`M#hit0cMYrR0Kh~xusNNJ!S?r(0#23t~vU!>; z_Klwt%j=7ZpNkFh&q4x@aO!gvwyHEX50DGVHP9?wv5I<%)lCbOy&|<9zzq?CEAOUk z;l+Ye_ggi1mTXYdsw2=ltOl{j&>Jqz{_P3I$GOm%!+s(X#1E6 zQMPEK`u+PiGo0|dN3Fgm7XeIKLoI_7DH>}fV*QaN6hBH+-wFwOi8aVs?b2wJ2?7P? z_2@dre%+|Vz;^vSau!GJOy?P?jA(8~b?PbdsMWYl@|_Jpj!pjH0TS`It*58w%?adD zXF(CD&Q#$Y4)5eQiU^)dukXJUtIr`-jpNxXSfJHZ2RlJo2?ITr0jYc_k4FeX2T-2SFP^h-^S^*(YGd@yV@;*S0m4cV5Y0XKOqxY=t?&SEQq1V7k$jk%%6Vzx zKF&MA++Lld8uM-*y|smi<7-E`7&gdl+PsrXSGY1$vyOx#s@P z)!$&me9b*&$`r7)c-^zG zbNhC&etihio?<7%&wce-uo}5K0Yki>V`At?mtyiPX+r?SL;pV0=Mm`vznd!d=wpJfk;YhK}X!V zdw0)Gd3iwj(sWQ#A9`wU|Gh&Fy8X6P@orB3tQ45?Ncj!`1>!3WCBtooQu#RB?qzik z&k#cODdO-N;8$c=%rb<8Kx`t{X(*+)zzyjLFF!vmSQA?;ovUc>4aBl04Ok@OI78Q# zCB-(U+V6C-XU3*zu0S-Sf#xr(=*n@0_dbc{uFfMc0$+_fDU_JD5&tYAFa@1%hiB50 zF64dT;zh@-mA8{0oKX?HJ<*EDWQPE}3fQDA_5f*KL9TU<^aNE|fhk>sr@x8M=+8l< zf6`t@km6VJ=%zw0Vo0-N*Ayab4-TJ*?y z>EB0lST|ndI4>gd9@ka3ghrjXR_=?o=ch2M>1m%h17kgKu4@5TpfY7++3+I(!!* zX~#Yz#3~3!a;^b)a6pXvt78s9|65?I$cr6z=!GR1D@5`R7=r_pgIE)GXza#%bV3Q+ zij)}FA%YoR#LfjZ;AqcJS~TO4Nh;WMClWuyM_?8lF|mXlg3l9FIWz#6a{%yB6d@S<+rXCgPBvfW!JLcC*~g zn|Dw-0p@;{91Tf1F*e7s3kQZ3=u{+qUJ2uYNS)wgx*W{96S?U8{8q@$mQXnG$=XlG zEQ>#VeM2aFZv*IkrWfb16fIU zv_+$2>|a1U+`2ab%FaOy0O~`oU%NJbHR7uYa17uh2f_m$-i$0dmz05Z0>R~$-t|z= z;$ahf8%Go=F}P@RkXR5^3A#eQ78Hm*Tt;KrTbpnQi73&c$aR=Q@{LwIv@zjkpVi_A zRev6=0}DcVap?(y-NlyG#B8=Ed%#Jo+!{1UI_7V)5tx6e1$N& zm`kJavZ@p{5Bfi{QT&r9)77PcizDMf2rL5$GIS9vinLng@P9&?)QWQ+cjN*-4^Q_g zv|+@-06VV4sHrlij?9~Y=JK+~zU%k4qLUNJXDjaaKL!>=F^rgp_x}x@mmR<}4^AbH z?c?G{SeumYq1TeT9)Jb;T=?P;o(Fto5U?%E23@yHkZ;co~$h-zUEUIX%<7!+Z@kt~92 zG^DMuM(}ni3%ShT3r`sUTKOoKU@HfJdlhm~q6LBd0%2;s`WDq5$J4OZ2nGVwzXf$6 zME~|^4Pe9^Ib@ThjNKt}79tj5GQAI64l@9}j*gC&mi~eCAwz)xp!slnl(#a&GRe3Ry!_Z9{B-P9(~L`EUr%7cuRJ=5tvTtXvN*62!)#A zTVMD|w6Ma}dNVeu2pYMOkHMeHnWW!fV9A6#Ugz|`8`;0<1TtP@ZVf;zP@IX*Tv&Nr zZ>Ii|xH##1;lgqx6cI=Y+#_C;fzzYiPjG=9VGUTvq$dWwg7B-500qVeJ9ra0%qTJ{ zBKBt!+!snWi-g)XY!jT>GD%+~H0jN{AP^<9Rcff&MP?A--*&1%!kY!5Ps} zs}SL@`5HrL-x3@5w)xm0h<7$qtB{{$Xv^pQd56_{!#4|IX$f@~{Y~(}C0%LWcOdN8 zLmVF=nsFzMLde<0ThnncDUbUjH$7A(CLytPCJAy-?J*sw>6cJ}0JUQ^w>Qz^FrtUy zvm?CN10YhlUmL8boJT*6f-hK$B|!eq7f|>@`k;cw}+qeMsgy@$?lc1 z7eCf5E)B-`W7V6ad{+u*-wMlHnkap&fNZT*7D$Bv9E8 zys)ZP4OX$+J!=4q3^YCQ=tclbZ-^_>ekbpRMF*pk(l!|?O#d~w`ZRp6{IsWuex#}4psJ)GF)SaG$563k zaom8b#tJY8B)@n@Qtn<5ERb_(Vx% zt#I`_yD%`!yMQ^f@kDc{P%;zeJ6cpsTWv$8Z+9$HUtgacG_{U1T&GUSku#jAHD44W%xy(`)j`I$SeS0k22(A$ zw2KWl1N((K*9O=!uYxp+fm#L5r6QyCyfiZxamFgAvrIt>KyxZ1fRis`&C z?p{L>MVp&e;Alw_MG>7fTM55mkMz)LVM~;byQ#hbKxCJ`R9DMBl*g${EwU83EqJ4m;0iBE%@$%ZBiI zSN8&*rp3u6p!S%q#| z8VWhW9^}9jIUI1{4>4YNK5YetK{(&?FHy@jQL$ zp$Ha>^;Y|kc|*WrGKR&i0L(cJ1sLlbT4L^v&~x0nnJ)V?320RgS9%C5x?((z=5P)W@vujA>@Q3KA;;i z=cl#gOo^ig3LEM)O69V!Yy0GO15z4Ndl|^HNTLU=P88_HT6&S(NSN?OGDGLh=;yXwR*A6ZCutWVxz)`^n_x@#P zPD9>96r<=7qOpupBYCKBmu(PpShvB?RO2$YHF_wNN4LVKe8*Q-5bgH{9d)aRiao~bj4^#S^d3a1jN$mtY-NMj=H{Q{k081ANjj;zB=~TBss|$qx zD`ZCqdMz)!V1)-Xb}bd_ROySf;{)&cchf8L0mCQ*HzQV&&;W<%E0D1=$OA}fOuh%i zgF76g3u*C;Rt{RznBfth$9OLk5||(pdbkrz#Bm_{r{f!VKRSf3U$>6FQ)UC}O+=2N ze9t0ceuggFUC~YG1|*3!$#MWQ97X2Xv>785sCRRMuOCj16;<+lsI{W2`i6UibR!S( zIR?f(Fg03*^lq~?FAmHcVCdK2Scs7zXSk!y-eNb(_MRl3S5)^&Stn$n(EkB}Tn?u# z=+U0l?Qebcx;9Z9mq1?#cxMO=6x5H&rT14-H26Sv!2LdHxq2a0JAU7em3%T`k z8Y=+GTLzSd<%#kOs325uP_`}mve580eBfvNHKjv9%>qeA8%_U{<|+0z#wx6QWyKV= z{ue4m?>tqFw;t};H^inDP~&-24={^hPmz?}z!ch-d9K171dHhGV>Z@-=YVVpMFsWk zUYvDyC=}i!(V@zdIzhM*$rK!!l(M|TtT1+?mjcCx{3qIrDu0${~@Xr6}dX5u_- zf!3)suAK)`%iGhK2UMXWkLZ3c$s__8cJs0FRF`P%wnlzXW`ZyzE zg38W*snq|&#JL$#eZcNB|;@rEOk>%#*Mt>|*Z7UN~w3HI5!Pe}(9Tob>Z1*JkGnB#Rf=bWE z24|^`!-D>XrSk)vv@h?&B?x7?NRajm_q4kseNcgPgD`JBRgq43Okc6yi@t}yn3m@v z6V|mZ{9fqaK&ns_6;x`IgF30G5NKJ1x1ha|)atv~e&axZ7zHHSTmXbb>k5RpB>f*?~kq{!2IY|1H>lbDNP?~^Xq zJHR9`3^;>#uiTB{LsL#d<8OTu9ZE2xZ9PaG^l8#V|B?qM%rYp)VTXH3udv!K(YzH4 zM1`pNMWQNJif<^PNx7^RwEjSm*Qpbsv1Z8wB%2iafGM~@0H{a)ZSDH*53QjnI9b?~ z_B`x2(Q*Y@Jc42HP`)d1m&cKU3;BjhH1~=x+J#rXYkK>(=%VvB8Wh+lrSBlN$@kjZ z*#)AGB*QrfGl~QQL`0ih({V1iYdg#C)3l+T7H)(+$7@&S@peT5hoPatAi2 z47`AhlLz-tn&!|GLtMpM%v%62EPs_W@ca*qU7x`82`zmcQpEel4w%gT z(0uFg^r@&v8T#Td-((jyIMJpOw*nOW1X%-agHC3;-urbYeRLi5xo86|EtxaUzLumRS2pE2=MbLYCWwfoqy55cs(o6FjY`$`p z8z>&fieqY@QUI7r)Cs6`Fj`t#2lk0vYIN(C%$v(VbEDsQ5~hvM8+wtHkgAX!_9P;B zz!#X50GmD=px$23D*!dvjNjXO>NVF?Ax4I9*d!zJ~RW3|~ctRD>CxV9c0e$Rw&XqfkKw8lI0Yy~vE zjEcv+N5Glq2q7YDb+3reOBT4sw@_1HEMcD&u_=+tI}$C@SAS?qibm!&gVy)of*}hy zjF%`cE_3^8yk{KdU_Iv%K5;jXI=5d=To#RzTY)qaCYLBJkcI9mEE(Zy+?+ztkD-O0 zz-F6zoe^B{^PCwSjA{dK>n^y_tFAAoUrAy{^ms^TbH%?-zTtt!d zI<*PWh;9h3IB`zRiZq$vm|S*x%4+RjH7BO?C+BS^=%!n{x(?2F74#7h@=X>L0M@GR z+vA|WBJ3>^M-&trh%^D@=mjutD3q5W&-J^wrjh}f1lW3ITrA1t03Z;{vwEPL@5jz4 zfdi%K66&!L8hU)Tm|N|v%)q|A)5p1B)OXNDVpP{NL1Z(>*E? z{p8whJ?62_6DkN8DXC2beG?3+#O9UyjgP9yRA*s>EUz+Bfq@#(Bj?j2rvjy6o~goo zkcc**zm!pb5G6ao2ysrqlWWhxg9n>pn`fq{gFqP(;vxBA>%EGf8%#CRL_jkYed<>Y zhX#v!baI!4E;b1n6-qJL79MV$zxux(6NB0KpViEQ$65g|+^ay=@Qrl>2Gg{tt{?hL zFW_d#YMUbtK|1p}PqpsXFN(r#IbMY{yexAGMMhIqjf{_bh?h>X1dt2@!x;y@`rk*r zmIL|mQIc)IGNyB?QtqH$L#o|kG;RRy%@wwVLOTQvOnNEOGak)bO7=dOBfpvVAFL!1 z+@yvan!v&kr^*^#Stfr0Q(M9$$Z>-Io3+lPI>Cn$pJFH;hyhUie2gE80aGP)0o^w* zJyE0r>|rg;R8>WdO_e(crGsgVs+-}|f{6@KP%>NIEOO3&icIIg&U0y&vye~?Rn8{sD+P`q*GvXZp>Of9was^n$Y0n) z)E69K`{fS*1YjA1VE}&9uCK{VkLBmPGs%QN3}OAtC~f)?n5)AMW#$m2Odh?JH^W73 z6EHbS-9hdz*En#mu_I+}{wxEEKVk_)wj4-37jA==&MR{n`N#)|SAC?RsW4{4oe$pb zmbJyXX{qoB(w&Vy*K8bQ>zD;EYh9eKWli-up|zFzjbbo%;`wyacV{svZhrWdG#YZy z_MC36Z#>jNS__zLD^3e?UqSca9d&ijEWbTLx5!kZYaAGYxeHj;S&YPiO8*I-22lx| zHv6H7I)|7b6NiH(<=_JGkni?soUsvxUA^aBK>#pP&gf>NVT49IK_ctn;N z?o+3>6XzMg%|l2VmrzIo53S$7ibBgG@$y1=lZRT@0brd|{$Jf-!Wa`+$O&geVZaU-!P$v1IpD+ZRq2>=yAxdhgN1oZ zbdf6q@OO=1L7wvkQJH*e9Ay`{2he0lggPt-4g>*hCPN%2&(#4KAU#2i>h~ahcK5Oz zC8}pEja(yaUpp*?7KA_m=|x6$(~HjyuF(jYxg;ImusstX8x)V{kq*=C-o1|UI(pak z-aOKpcjPP(6+I0_@@F{d4JmFFUEom0!z6SZVf?*$lA4sdvN0>h;*JqsMO4xttPV)H zI6372(ft9a$b~PsCG;c85a?rYQ^-q*BOA^L#|i{w(gEcR&j`>xCHekD3`}ovWq#Q7 z5_ow&cwz3M?(G}yj+GqEh1cS)b!X=4f+4c{Nm0bcXrPRn&usJ{+LqpST2{hw;{as? zHD_Dl@J}&C1JhOgS?6~zi)a)+Kia*R$Z*^Qpb5~K$jeCy{!3 zM)#FO`rwVi$$j4eov(xf;Ggl$h%CgQ8x9E%UKEaxjoCqhgu`MLY|BX}5{@0{R5v@k z#N06?!FiCitwx6`Y2CsZ0a>~TzV{#^2MtAJzDQi2*IcP2Qa2*LK0RGwz2K5D^U@`# zWPx{nz2@mntLT^O=5Y-K1WBJAO|zn#1Vl*q5l52t<=A+`E~U?-OgVd zH}2f(V zGh4ki0cCwaVgJ1qq^$^(3{Di?i3LYdh6cqQXia9iP7g+Mh=05BITNB6F~kIzS} z$WO@KQ|HF(3>!^5H4$*Em@2wM0~9B}esz-+3P*N>hB^6<0EGyGiu=rG)cXua5DKBx_a~IDTa#;;x(gfZj$V<_CYf^Bn%DG zXweG^4h{oW2ROk`H@SI5KtjuK zJX*PrjZF-zLmF6=0mePuwwh{(s?P&y*LxJmK)TKv*FO|VNz(ZmAR#5%ml*w4WIE5=LI^F01%j`hndTF^9OjAh^UZdo`;DBJoG`kV7^5iAZ4pQ-T!z z2&0GciV8#c6`N<2i$jY73_$={sTLm64vYgtEOUSJCKwxB6F)KQ2@hSu7bU`EDUDLT z_OpruxdbR{Z^QEIDj@PvJe?kIb^oDS>7DC=5!Iqew?Az_XbhPHe&dpa#0}gnaxV%w z(yo9`xpC{3FUZl4aiv0OzsA8w%EO-t0@V-D9*SEdb;f|ahU`=7XrCsco(znF4vG4~ z^rw>{bGlb);iGwfiU{(`GSn}I!v(CL5D7m@g}y{uXHasgvnw*GaZOlYQtb1WFROOy zr3dPloDxBkUus`sT_tAKWLx(FvGOyj)ii5)&YBK?Ws3SOd22RAIWT?la@cI2o^=Jt z%dxzjdRu1Fb_-eDMEI$on_j%H7?sP_nG#@>99i#F#nM6hf|YaEgwkUa$$UEpL8zLL@8 zG!m6V7FGz*W^J!UE?E}D3*dfn6zPXnX+2RrVFB2?x_KWB-QgWVG|Zx`z7K5C1`!Ea zMi@9Y$LG(l-MHa_vOfypN)tsZ7LIgyA2a}CUsp19z2xtQa4>QUfF+mw_Xwsv1*`#; z3VInKU0q$=tzhG?W4J~*q^Ci680@$T@=bB>tjP*79e_hbnZeI$9--;m`SFGcyf9EE_7yf_=YqSSGd-HI!=|vw= zCCu9#b&RptG=M(w&HRaT_w|jaufK%|q5;#>X)OOW424gA0&kf^;}KbCXdw|shIknp z<~rywcNM{vmh0Musju+ECwH!2rpQu-n)13z=4aW zPMx~{;~g;-3C3s0FXm5OlL~lneIC( zum~JTY#baVS_4Zc&mrnxf0BP;X*IuY8%kQIjDTiW2iC2iR}1!|U7*PvLlHpyVO%DO z6&DJw-X9^v6vvn+_k<<#s8aIFnqV_o@#b#6zJI!50N9E6G1}JNPS2>1%q>HA0qQq1 zgpZbR*=T>(0|!1-KEz>PJ%TN0!jV3BRua)o5C@PyiaSYV<)Bccul~s6%o;5x8n|7e zyPAqTne?m!f@Ti(4#@hSo^Ob#w#IbZT*is^rt;Py6MP?r(uKLE1ydMt_CsXyg7ws? z8t58Z#l*$OX4^TfhXl}}tPgWaMJ z*)**RMjmAS(#P!Tpit;d{Bc?@elx6jf4slx@W=?j3%Jpmv6%KJx4z_Vz47KPy(#uH zd%e&v>wKgjCM>3yq|rY%g#c7~nKPqpJ0=RVqN9ED&YivFdl~fC*XUP>^Oz!5-~KZO z290y1DWr?beZu%qA!;tAY*t^BSX=|40dcY3aU|cY+SO z(CrX_T(Fgh(5*iBQ`g17o+|c;YGP>tfPcDN}CI0`g82KSLH+S~dy4ImE zs2po!?miKk`Ev|*fvnn}X`Ze~GFbWVcf5l}K$ZrB3h`kRfm8%^U!h(3mI(c>!EJ4A zF+5pFhZRuj{lG~e7RO4rCpnw--@i`P5x+{Zz;V8f7IJ2xh3ujP)Ae6yPrAaHG3q)$ zo|MJL&#!<7P>y77dm=RNQ#}|zNir04fnI|+WtD<96}Y)|6lu`E{yf_SRp3vY!tcPc zY9czjM>WS`H^S&Q01s)u7}nBHgqZDBDgY=$O9Urw!PVxCnk2LB13Sroy%9ylh)GMo z!K|u}&dzK7kQ_{R3NM5M@(e3300sef$BNM50|oWHbnO~49%)MkjCzCvrsPdQ$@xxL3*@mg*Rqx1#Ik2010 zJJ%PDkiSl{|1xEiojmR}c7&)F_$^q-PwUD21PP`k7GG{{z<=6Adv&HP9T6b^l)1*p zKc8Oczi&UaRwU=YA2nsutTDNa{KEhGb9C4L&s*}+{=Xj)Mdjq;j;~thlD`iNfAlVu z_}tO)I-XmF1NmaS_NI(`*DZcun-s>qwB&C#Ki)SVvqd;!L*_zGS)2hC4oiPQStkL! zi)4rp3-}4`!tXP;d3Ek7=PE_&cUl#J#Q^(=80(V50HB1g5X%?9%6Q>fC;QS>uRT3O z0kHKT_To{|aU+@hUQ|>GWE7{M%7N#2-7}4%k*nK}{{4oo0RhnfeS2}*=ucm@d3Dzx z50Xy)xW&b15Pa{(76DL-Qpg_sI=HNeVfy=Kww5z*{L(sf5WCg& z&8t_x@m7JV?IyUXNLO=2aObF}D@FvQ4=moNtJ_Tk7*$nO#awZgHIvuoVsIkGY@jje zOkI$4P03)|d%J9x`r^ln;@{j?FFrl#6&FekAzR%ou!(ups}s-m>H1F>@!tRb^Z#vI z{omG50L`Hfyq2qkrn>X{Xy79&9 zrcMXz=lC%8n$oL~XK!=$UGrx5Y@|$U78Za9#{*m}#)wmr(&5AirsYUx{*%9jo<0QU zi3Wap7yVuMqnl4|A~4&<1x~$FK5h|o{oqt&3R0{Fi;-dyqDjWoTd!X#gGqT;x%x9E z6iNKP51#Dm%+LiO5(yYCLK4xo&*ZJXoc}v4Fvl(~R*L;bZue5k_bQfQgK4K9sUg(V zKH#LS(@e-%>nC1fcCciKK_hmV@di00IW~|P3Y8!esZf$ab0sr*3pz*b=VZv{ZNr~3_fd(L2#j1o?SBJ8geoMi zQIIIzAf?ukhvbvKu>5bIgqZJMf(ma8lL~rjwW8fwZa>VWN7cJtVHO7zav?<@G!_8} zCLlzeiU4pKA2|0!Wlr~G=SO9X%abZ5bzy#H3ErFH(S7EaevzU^`F0`lMk{r zEVK*S`Q5>wwfF)%6P;&1+bT&E`T&4Dq);@*i* zMmed8jOUJWMTznkR;ir1qcM3tgkZj6%f-Fr zJ~dsVmn>|22l7HMUa@|<&z{BaVczx-#!ra!L=plRtcG!M|6~{Ybc$U^ZHyW>2yF)P z#*L<-qymLhfX~EWoNUs0uJ9V%`bnHDBr>rdt9UDTcRdy~+s!1ogJ$i;?!p|^F*KcB zEw5EZ3wQz!v8DPviqqe+nc=^e4@4Y-wF!G&!FYF_ZF3Wp|xl*jwGy zg3z1Y-Q9{zl9w)(Lxo@|42ye;I{GH5uA5YcJ(#RXif$x0;Yi<`g=4s9;hOprA>K)_ z3e`0tKi!h!=1ZvG5=A3x$i96y99Xk?KCA^vl0^59A(q<$57Qs%>!0WgT%bRxT&2}- z`AD^(v~^}P^X`fOOBZ>wA9=kpP4=8PJAe(6ZYwS8jc9>of4c5=N} zQ30&*X`h3ONrPJF_-?D*!8UUiv6NFOxyofDh%qj~OE@nh3AC zjF?vQ6PqVPHx32QE2e4$g|m!HDRzDzL9CoBWUrkLexwlB8T91S6CwYMI^*uzubbob zM~p&LgL_{mg&sPQapuR&*#3)IZR3e~57#K|JR9Qf>GtV_aFi>{eioK3THhk7-}#TL z_gBvsl_y=kIa{D^mHU^F|9&yY=P%f{606gc6w|NsJIF(^*-O;oVt|MbkDFiu;Tv!7 zaB$Dp>nu=lCVu1a>TIu#<)mz^HO7}w%#Ri`h}VDV6#E;AiR--+Y4neZcQ)v&PrqHg z-v1rGYzl|H8BsF2jS7>K zDjOtzRx4R_>pKhXOk_}OJ{BVOM4786^e2?5X`^*tpM3Q8)y~mdZZgp((WJ+;*W2*= z!wa=?iFwQGnZ5KboSM=(csbbO&>E~1H0eC?x0==zD+hh=C|Hg0#{@`Ao#{^-`)i-e z79rS7PiVmuBk`N$<;xXd{tSSL>_1kz)FuXt3(tG;Bs;&?ZJOKn&WU^Sy(PG&J2SP3 zN!#A@*v+#KHlNO6Ww%4!dfD}5r~08@6OWUTP4CADu zmtTrrclUx|c!-nd!S`}fk#b{hM!HRbtF8UQx+Mh_bOk=u9OK~^xh50DA||68d!FbgG&)6+w0Xqg2K?_}SUfg|{@-nw|LPp1a5 zLKf=B_2vfqCTe3fol=d2WlGbzZy&P0wMS6KC}Org9eJKt^37bC7ZcN|3jLYq7+pjL zvf@sUCOCRJY2;+cza=9!w1EqUPi-ty(!Qp-&cY)N;)3D7acVo;ri7SZ zlW1tP99_Ls`vpyt`>p};-F1c8UV0mpI_VyGa4tyrP6To9X`-2I=~ng5ov*|G^E3`u z>HSO-Ch&J5^Nx3TH$`_zH#56)c#mX?T3^ZAG;_tRiH4n5za3RQ!oOG(oeycLLV@_E zzKU+y7aob*-TI>+bQ1fR9_zRMQjD*u3Ab-QtulI9R}XKDuw|jEg-uWXylVTHz^_+t z;ZPu&s<2-v@^y(ZMx6xg7Cu$>M=shlb$vUgY^VDA?$S}nk~>vX zb=Ky#}^4n7(l#DjO(YiAjf3s8?}>P&L8->k~MGDn~Id3+G`WErTewKc$ZLg zr*@pw{NSg+!xs5aO588ubhu`BV!!o3NuQ5^+8t5l(E`yJRpG&aw{Ha}1{(K-<9h#o z=I(B6T6GZ(ryLYgms&meSQ8hq4P3|abzTo2elIBD&?qYUJTX4d{Os|MW9{phj=t6$ z`?`N{QWaf<40fRpPo!fsTm0r zJEzm|ca#!^Psn8vHw;dp=%o}8V70IyyTgDx9y`l(58Eo%i;n+iQiVu z8h-_M9~(|isnJIkCZ9g_tW;RvBT6dgKru<3?ZtgGOUezcbo7iD;R5QnKvkk#THKGp z)stT;)g)=f0}i@8*PFCieMN9j;sfQitf#Iq&698ZK-fp9IrssO`G@00gIWqbWU)}x zmfYHtTE-X323Jde8U6d$ox<_!ZdZi={&ij%rum@1f4%niGFBhK=zx<=1D02r7JvP; zvJQVeSG|qBMGbT-er~c4<0DQ zh1yLoe(JkN%6%m@+YZDfnXfx~)r0AK57hl5O1hrtR4e+2$SRp#D3(879d|51)-x*fP0Adq>U5z- zq{($JH;&Uzh}p4JXAa5QyRG7RIJ5e*-DZ`iOU5=@tlgiKbY#Wv=ul798ps*pL|% zzAo@i_RPE8z7ZQ#;}egpnBBZOg_ilKWdALVLaVb=-bM79>NT+eu3nurPNa(K&#Cp3 zyW;~pXuU`SM>%VqBuw!vK4xiP*tac-%R)0+dHuH^GEdnaSOR*R4+my1Ur zoCS?}7A7oWXk@e$b=mJ~@-`Gg$TM)_h$D*&wM(xPXXaIOGFvt>4l;-b#7G5aY~Yh& zdY`Brud^^H8QjJ}sXrY#ww?cIDeXFfSS8bH8LQCUL7Gi!G6 zU-ns;zSZ8=rt!`6{Tjx>gL{1hkKCwG*S_WZC^l}aZvM$0r?PkuXn<$SpiMD^RQR@G z4NINM*Xnmb`6#OO zh^-ribtORjQ9yj2X8k0)!JXtt?Fx$7b(uvA*`T6%--+91-5~@3tzQk- zm9u!z*X7;{?(g;;u_suDzFINISiP@Yock-UcO|PNB7kwY7Ns{OU-mdy1TxLP}3AQXin+u zP1Ey~nVezJnJy|VbYp2Z|C3T4nzRia0trwod}v3J>v`FucE`%*&Ni{-SV9+r{pl3K zHY|HG9mBKYPLQb1Beot}T8^v0VyhLVGe*lV6Fm(Ghz#A{ArlM`IP3>o2!J&1KsVvv z8cc}h=dAt*js4NsdX^;F;LM~zGl88BCfvMU`q0JC)fX;Y0LXR@my(JghH*?gq2SQ^ z6?4}vx(3rU=8?D(e_*2L05A$wc_46^lQF`VRtb*&I0s3_j}5}Jq=kUjk~eG_j)tRq z=PZxbkI{4{pD>QxM)_`xDz!ZO*i_Mk6m4I2MKq-Bvu7i*iXCB%<%FnS7)M5G_nh(8 zuD_ad>|FF3ljRCCr!S&PmA&GwA$Hy1%9YQbbk$T!x2&v3D*Pp%uDX(NSzr+Z`GXf>%D%BIQG=*R`JS{!>$! zC2JVSXx_E(rVI@|I|M|>da6BH=Hl-sm>Ks4c9#l8fBgATb8s;5eq&Hdqt(cfxsXy# zA0BmGJL{#N_QYK>_&Q+PHj(qapkQcVL$`{vJcKh4Ah-r!2(Y-Xwf3p!YF7I6p~Xyr z_bSt>r{QDGdG!g()Q9Jz2N*6xkX4EP{*19I1-@JN{F5HGUbF<+w?oJx88BbJVO>PT z;oA?sE(aFmBAsPrxeo44P&8IRt^kxw*4@<5jR2>_>>GA;pCA&r-}cvQQdbe~y#YLL zh++U>a6)Ti(-EZe1e!TdXk2};(;L!qLgbqPXq5r4PiB}B9v))`Dxtz8;+T`O?ti3U zB8I`{S6!hmDH@?;JZ`y{?nCB=R4zQt2QOEr>_lHsGegP0AL`l z4zH=;Zkyf6XEYktXgSiCo!^l@`;nzP;#jHlm0J-kZ0sK=jQ|Au2+iz|{1q!PBCPl> zRLM*+n$tw8&`L4zrJrP#7H3JhXMpuJ?YF0*(?4iRG0D!wuaHk}QxO#GV{0(%yltjC zJu>~URnlj;seJRd-Pm%ioIB$U+LO#o=d}e#zOQXq_?1J*v`6)--|LektLq>5c|^{h zsZjC>-@srU#Gqt;0f$a%PpnvN#-|n&$h$I$@cpS$D&AG$(dI z$K~=duxDIqxcWobwYX}5-<3&xc7jzZ~hALt>0j!*`!(UUQ0eZg}d4Jk+~ zJjhxoM$0z2!Zz2-@r4eiioAw@eqD2o2Pmz+VVV0H@qj^xf`KQx#SN9mqcoVUe)e)^d<27MI8bxW z@K`!$RX#5F#}9pAQVM^D-lVdnPDSyvh|xA#MgP*58Zf`EavX{T+Gx&DY@0dXh zIx-^|)YE@gGzDHVa2KBEkw-PMM{8QU>SGE^chZxU33z(fqmlRxPsf?+cUSM7y&@3) z`vuedQ8we8$I~{PS9aAo?KOV<+q=GP6Fc_fb#xjvw**MX(EWOMW~rambhoN51M)L$ zA$On7pOFf(M4rGxdRlV*(>ncuuOZ@LaIRMf#5mK1O7krzs!+p!vR~)4@B)&sV6x9Z zFA~Vdsd$9w6UcVNQECQSyl{56?}hR zSE9nD_>I`A{>TlV?8jMfq1!pGlznUxXc(K=4x#(^!o@za_*pk^%Q&Hz>A^7-RH@%C z7WO*qS8{Uphti8jj-~etU&SnIy~9T|!F?zuVF-78-!{5!Q29mhv}>)DJZK?h)z;UY z2+_|jE;_Rh?YfAI2VDwO(n7kPI@K?qRX20H9LB=R+WRN2vKKv z5=u`NNO@~}1+=01@_|)lbMKYrArqGHU)ieHibk?Sb=X*j7T`=L@cQqyJP{3cu2Ncc z?|F3MbQVbQC{&l#TK$U0d_GY>U`zIA)yn1X$NJyu^@eCxBvzK#hvfpocQm%$q|J>5r zJaHdUSEIhc$juEtF*46T2H`oZi5ip(>aOsqr?Tgp$Z@f*uiTG|W-jzEEgfS#X>8>ya@U<_Ee{_4!Kw5ETV`$99z{ zEn<4Q-RlExER-bSE7>? zCybZKLV+q){#iwY9#QK@n_Sza0O+=mkZLsdZr=29Cc(PUk;(J2Yi)vFDDLx;+LhUb z;;i(yT6WTNoiMh1zVUF+(hFRP4#F$wbPu;&26#=RMT#cP%SnsO!-qQYI;>;t4=8g~36x}u+%l=jb3sMX|lhze>&|d_QUfbGBN{4lw zrt48FU8*qgE49gWz3zexbS|mTNQHu+dyzVrz9z|3Z=T;ZSd&vIw4!9!yVP()TD{yD z)rt<~=;Fcx8ExIKPjygvT+*rf5vo=?Tq*fUDJHA0j&>)vQ*ElM6HEA+b4`5p6+5u>RL0M+kA*CZ8}X)IkKMOxA<1S;CE*G7}rw83UX1+}7-PLdVe@ zwSoUC)2nTDe*c5L_YTLp@BhbjRjR9`l8B^1R!CVHr%KDt4jB=$HyNiRqC# z!Fh+HT;}g7$+oPt5iH0WP~$nbmm-E$wa(*AM^khjU3aeC5HoCG zkLSLhO;;|-pHA#({kG7Oxw?AzR0vZNpESo(|AxRM_0~3!4}>2dH1mQ@(u{HqAW!Ml zAQm4L8@SdHJ-{phx|^H;Mq!oosf9dMtv5_dKReq~Hql-R^sRc1i>LjjmGjFWnV9VI zImzoY#DU`s41^V)q4~fF+wpHl?WWt*QtZV#X9-8O%(rx2)SP)9PPO!q2DT6b9a9Gk z`4O&*qcpUDL?a0>?9!S~Mm(v@w?HQswDsT`2Pz+uPQx5(MKHx7T_!33uQOUeCKh9;fbS@RZf(MhSPteY@5&a>d?#VJtI1;H7&o{x1L1n&is)cg!ll$D3v zn9ae{2G;3kza5t4YyNV`+}t2xiYjh$l0-I4FW^QTqx|!t-b63zLT^-5fsX?n+~44W zo%ID5AHycz3yIUCI^Gf)qQyAz0AsYL;z^Ln^CzH|Sm1^O#p=e?4>c);D;C5 zv0#@#nkm=KB~=)4nfSH4z_?Q~2*Mjb=?1nyt;StC_q5y0ym;z@L>rQ8g2D2*^UnFh zmw?3#LQF#%*s6PBLNzFMmvvQxK~odtsy3TMJtkVYX9r4dW^{<-Dx0?^^yVnKUCqn( zisZi+%oUwFVi=h~74fO7V%sjF{<#iCr{F&p(WP=sIEtgoP*h0!%}xk(-0A9vn%s^>TTRkG7PlY5%5n-qK&KW@yre!52xjvQX#IlFOg3O!IA z0Tz6ve~|_+d1Q5uxz?dG9rvp!8~p_aQ_&@Z>;s9ExeAB>in6X}6I$uFu_NSlI}J>Z z`f_&x2l0YF9UGd!4gPqX6fi+68VtcE^9J=}_ z&<^So^+4}|B;Q=+tGXcWmIh0fA}DEifqBXU%{da)fHKJib0Lf)i0Cq}WH*JLO}NrLayV`#p^y3~nY_)aoVCNzy>AYap@|>AFF5X1MBd zlP%4zbE}HV_RFd-@8h4tU%NYf<(BDGNee2!tX_65eiq&C^qzG~6)kb#08KYbjLXL@ zz6pLid~4HeMH5KK78`@*jJEm(@>~a65c&kGxI6%xk2jL2IxZ)80u)9~PorvCveI*3 zLczahx_TM(t_g{Bd>zmw*v;5}Y(8zBPfmeJ>Bu}Eq?<0m9yTYb5GyVxxG->8r#j?S z?f#_KY)K21oQ#L^G6%n9cLQ7|h1lt+hyo7I^O}LBq65nf*6UOW+Sbk*dftjmlV#bF zS%E;8O+F!#Q|TcVZop8mNOX4Hkk6sIN9*m!8khDOdkbm0S!|T}XSMPT;j31IcoeYd z6A+*RW+l(;fimX;I*Q=L*3dbPIGyX>UKhOQzitz8^cPE%s^;k zaNERd15TTf?G|+7PQa;FP~>C@3_cNuV5d&-tRZo_qMWF*>H{lJB=X7bQ(NGLKEV&^ zuOTnqj@YjTOIzowv*Puow_@pWj^-?QWvV!a!F(^d z>?;pVS0?AV>`XU|ue5mtJ5q4XodrDAynvZwau1`uQlyYt0FKh#s3IOy}}%Wwl> zNfASyFw*Wi!RJD1Fs{1jq%Y@|eR^8XuZv;(lr6|X|2D@Hlg_qt+1Gxc!#ulHnX)Yz zeMxYjagx%F9>bhV_e}b#+Wwn~6J^=bsev5yhiye1o?RalHe7p^@_;8lG^vzlC^2V% zK6pOb^O}Rs*(0n0E-Tux6fw!Xdp4cN*>3aat$ii{i4*F4XYe{FegkbiIN#OM7XY)jXP=kCek z%vpKzcat3v38s5mh~@k1;SHkK-vthLVx{T&b{L|Ov5ww+&pcs6Ag5OAUK+GGHA%Vk zMJLMvU}BJOIXwQ?*5x$JH_biuQ~ebDw{OwHKieN^L~$Lcrr(qr`;B&%4gKDp!-zH7 zPpOEQGMmE`G471OL|x<{2e{%5o5ku#Roz|}eRb@sl0W>SHe$C{TdKUyg!dlVPh~9- z2BhBA|c!1EtX0dQoZ=+7dl}uI-h!Km^>&k6|P)Mp@aY2cI7# z$LtUWRFUbFZt`$VAx)e+M>zncL9<)Pvn3tdm zfqFOr#PC;toA)xRTo+~W3HuCuah#3P$ zJuxFFW08*ywKO1YS3_$Kxb?>D`oINg4Eg>^uFy~P&utz1@CL9GPF`$0t3c^UD0O9~ z#fHkEuuM=iUl)>(7zlCJx?wr3_x*39w z4I4f^l9})E_iNzyE<|=lFkiES;MdB^>hP(vU+W0)ie;R&=_56HQoQq5H(=%G?^5Q` zd21P?bhj<<8OzV#`YR7Jp7#PV3Z0ac7Pvq<=7M2d2`u(UEMLIY>9)_RlVz1ueH*`u)G|*q14iINoT(q3q}AP57Y}R zuOZAB;?R=0<$2o$p?F%*&YECDTXd5kesV_;279UJ z`jLmhcTIK(d0|02g)j7CPpOS|-pPmirYp;7$@*K+iYilIaf^A__pXUSE>QQ-GcQ-W z+w@14fDrV9o!bjjXwloXw@IqtkdzhZw-hhR#;%TdGfNZ~E^&3wZO$7n=06p6<=olXsKx;u-n8lU19>l(U<^|^-QfYb;#Q5 z0+;zBhy{>|+6nj#UT~~?2heR{S#EEj3u(@qKpFdkM9&-IJO;DfA= z=0?_bBwB|_+*vVhTUWnpqXl7qm`1qhv+6e(LS+(DJqj(Fx}zp8b#Wz zeNwf*3Y?wpyxsO4e&yDs}A&5*vXA>c9|GvGxW7vcF*l7l+ep0>6@k1`XYiDwEbXk@61%C^gnG=UTlw7C%QJ+w3`Xw zC;KekFje2KO*2m*@6C73MK9l2ZSLRZE2kOpwH6QZLAQub2~4wafe+Vi4+lhfi=RFp z^3i0^vOsUen5w8#YmCJzPRuP~l)g(P-@jHx-Y;w#gg|z|8Tk4;=3THaKJL7e@o6h` zDc`N{{Ba(Ipa#YYnaSl+x7kuWL(_oHeaBgj_UUJ(4guU_We4<})v`_swo>br6wqFn zz(OI@?(Kd_qjc@==f2{ldL3x1lD3559%ZT*w0@9Ju&bL;W#`WyUGr{limq%H(v%9l z*abuJP^MURAGi9ayO1cS3J$wZ09Psm(g2#P4E_&AaIq+Z9fZ%IG3>Hf@LWDt0x}7_ zVa(R(!mQ8S<(zk%O7Fd{n3RGJE|xH+EU0@%MA~dQWW2XO>F_yks-;~@Ot5-D+=&wO z8Y7vV4Y`nWD>2_Y!$$qS6ZEEd0Q3;F2sZphz&DZmiNm2}anTmIU1{Vb2NA%1B9D?S zNKx09-xQgF;|VnD)UZZDrmc0iK=>!)WX*_zjO(|HPls>6SD^3WUMbwjbEq}f6tmU6 z$3yDaPbBKz0((h}FM38`e*)ABxsL6sslnmaV$vXqO~zUEsKhOQ&0 zL-u{z9YZmVC!ysmS!K1$C5vxp^t8=?;P4|Mp`kP#;inB%w15W^H;l4-3j+{i@mVFZ z5HP#!*as$UWFZeqL_^J{kC;&fG*1dq;HTaeDZc4tl5sAW>2SL(tuh%PnqQOp?Zd&A zPW1a54=!EPdzobQW2JbLQ2^hc>ov97s^|04R~djYpMboAeL~zUs?PiyKW0w)(3VSP zhNd5^(Mm+Wzf^jso0ZrjZaCUiH=KNtnDX$M*W(uA0LDyv9#<9Y<3@IDhm$x-lM*>& z3ewA~RTT^~2BQLZSM&F6JX9ch?OxiY9VVDhbBp-b2yU6<8NlMQ5PH@AZJ{PrbNw)B zq(r}%5F_w&V+5!4BxP3$ltl`#L5Xc2K7R*k0eZpuxHY@IB?rE_Zg7{T4NNpH_?TsMi41NPs~^pm ztjjM&y?S*<0^Z1bIcD{JS>yZ{o;=>lHGcIv3tlojDUK@7;m!}-lE+=M`(ooff1%NX zD8>Mi8{`3w`UHlc0n17XI*})qNks_|qPX<;_F>2x>;}{Gdn6pssXeWTfHbhjstUH` z=5wzp&i{1>B)V$JgKxMy3VrqGpkzD%nOEH>4I|^=D?rcHY3|Fu&A~gs9x_Xm*ca)Y z{h_|6AZyW)Fnl0id+nPs=~LF?TFlX4j?%W2sIB$IgrG!qp1r}#7NM@0uMH_$1+?8V z1I5;EhDvXl+!Sz@RLJPc}4Z_!__(>&wS@1p+!1cP^j zTtyW={9G9*&^B59yo==0$`USswly^1$$^a*Nrv-g@A`+vOpSyg?CB6gp$y9c=Yr45g}*1W zu;qg}azS*wvfT-{Fb#zdQ#T!MAK<^unpL<4TCU^kv&hAR{CNPV#RJ5X(7`|}qc`yS zoRCm)V}+b_V~mOCEp0tf&;>>)xb2&2aU2BT37Py8Iz+PL36)JRg_n1cu~P~R<+T*+ z#nd@Wa0FI3WL)%Bbjaz?F^Vu-r@Z~aVP>o`{RKR96UZP#jUF$g0*iv5J;|IJ=ZlP! zqy>5p^LuJpi)_@qy8Q1o<#3TU0ZdcgZ^Ul9dA-Uy`X}Tfv$N5(p5q9rsJw_#C=4v>bOvkxEN=v#nVGBAP^ftja@>7|INb% zz$b@O;YxtOYY}^-uaClbND$R#JNeEK+@)%}?Ao0~PD2F^l2hZMxND&Z^f8ph_l_#sbcfP@iO1YKuIId}M`ljVCUefLH$Rw(u%5RX z$_gR`t(c{tni#Pg;@*c;;4_9YSuRR6HapslRM)mp8fw7K$D^{cjce8oWbCQXUJ4ow z38&2nH4ssqDdNjeWrP2Vqqy48Y9J>Cq#knA+zmo@8x5SJWi)++olnhv0E<}bWzYlp z+9b2&Q9lLtbb0v8sQH0}>FhZi$O1x{0F+Od0?8MgDFGW`N`xXU&IQ1o=yzr!XZAFLVi~TJ!CJuvzzaUD}@9y==lthR$@IQIwc8tsXOPB_j ziQs{(NRDIBX=kHMB*N5l_b`49&qfC&f=}qU39Dwg zuBXUpxe=RN*qom>5(+JtvJIiFP3L7c7M9vLMle;Z5?mj+t)unJ)1;b|N8+hx^Va7J)P?_)Dkc=$E#wMgU ze0AETK25z~l4ZblpflX1x=J^Db}%E5$!H1}L+$J`6;$oHP|7VSQt+g;HYgDgo*%yq ze4zgCcDLYsql`TDM*7Opp& z3&%`E`T6+rdd48aqb=JN!g0Dc~@r zHlY`jQ}`mVXB#Cspw#3j`*4))fvlHc_5BX`dJ@dHM=%SVg2R0G-}jjWuqVo@N%4VV zubGy_^seP}BQ+XnCiIhTgHY4<6OU-uDaW$%VHwVKM^7Ppj9PY?pp>@j)~ykI1aH&5 zgbV(Q1KkaumQpA@d0f>iRKQcD*{7Qwx@Ag6Xzi}>YANl1fNGLQGBHphzh%PaJxL?* z=x_<}?@?o4->11>6wnwh+0K29pL>Wdht52Be2k44OnK1!mqV|1^F^170xTNug!xe@ z*>`V7?0W~Xtp_dTf>JhvTB-UOZt}IVQA@oI+|v!rF*N4wObKapeCF}9<(tJl)Qa!P zzw?{*(KurP&cbZ!crc~m(*`z-mqnt3v&GdMwcG*>X! zEe6%Fuc&s1@N8)Un5s=+E@!%WUbxLYSiLy41jo`SK;i0ipruTre=> zes(!29kVef4qgH^0Owi(tU{qXU%Lfh)SpZ^Z!3}!nnoM)Ip$joX~kewHhi&NHW&C@ zB;ELVvLSv2qN>QqwvdfT-W)?CBV(b1--^5m>k4e0$G5hjKQRL3p_bcw4o}!X?rd-1 z#5y4eeiPP3)MhUNj+qMYbTLBqV3-F`i|uLJwI^QpE~+tKP_K=*Ud%SJ=m?GB?o@lW z�%UI=hMv$g%6&8*o~_2Mdtv#+LV2uQu*bmpBzOb(6UlXM2mV-u#;% z+4A!7B70X?{J7u&ZA$r^rISO9W4hqdwZNt z?dKK{fER56UNp{>=}K9o53lVpmpydm)Z=5X5h-ajM!u;xuS}ZvZL#aRD$Z*<{#atK zf0%Rr#yQ7%zF}@d*0NiDy8dBou#miU+rAU@X?UQ(hfv_0Jy>5~-{vs1;kfTPH)dNJ zUhmGUmA4<`0tmf@BQz6}k4#o3mq=SBvnU0w+%Q^i{f#K3z#gZk$&Q@4Xx3M#RpeMM zpwX43XsPN8(M};w=5;t<D0G6m!Ds=&zl>3`&OuT!p!s3Mk z2DZxG@mw6n-RA)$3~%&^a+=$W02J9x1W$p&Eqsxy3`zlC_7`)Nz1XcZlFo_)jA z6r;V-a2r-EWq4X(<;?h6i^Plo!2nGskHY0p43NZLydxO&RUkph7PP0{quqz@OGiAdF}k_ExD%2d0iyKFY8Ob8)CXh(u>;90><`=?agfT z1Ay;F=}JPd_d=*v5_l`_QPAJ6@unaAyN>83Z6OmA_x(a#TrU%^5-wOct@dTgavH+24_eS4MRxwk}jNErUfX?wy<%i$w2ggV)#YiHY%a&5Zn@ zL`K(*>!3NsuRs=rL8shwYYJ!tt@@U^w+hp7;hkb^HrmOugjN~I3BfQ_uv84I9LvKe zvGQ|q!6N756Qop{!wWfFK_Tkg$~1musU0ofk5&>y$)C%>^HXDQhj)?kzoi1~Ho-6Q z9xGSeE|nF{@i)M%e5whkP`YB+BoA5755e0fYY(tX_xm-8ZRhdpn+v@a_-TpI=~=h3 zn^&S8C4)X0RdQD^4~+%(6$AY$_Q9>9>%CUe>9X}y!`Y`AyIjkEQW1X4`*vk&Ccv9E z-9Z$k*3JvxtjLJ(NL6e2nk?tG#VNubCU<+2XNZG6PfJwOvbEY%45w}Rnw;`vd^>5S zJKrB~MDo)CmylrZI)3p@dFDDG@Ojn~lBk5cwC_C* zy)5vyqMQdzHqO)6?x2&!0)!P$BOPjzD|{T(#7giGY8BcMnthJe;o{9lHydeJpDEw{ zL&M_%CBs91ublEwv`_cGOw9I49yMt6KbckBq9~brAQ8kOEGL$EKWPc?+9lP}(!n-R zGQHu;eQ~+>U&_tQ#Z1wQU7cDB*4BB3mG*WUV_gS0R5F!pP5sI@TwX%K>$Hr|Y)*-e zhkAokv-Bp7rghF+*X3G5|N5%E@Cr!8CQh}yWvae;Cqur$@2LSqC|{<}YrF@sB1OT% z+JvD{HVJU+sy)=38YGl3BQ&b(6E5cPlc7seOniIPBm-2@<$wj=9Iszgd31NPO);iF zd5_*PXghbKGS4eP?XWmi<~%_&O;=pEx@gCtkf(Qp+e4)n(Qn%B6M@EX!J0QkqS- zB1!?28*)?Kbh7QF6x(8znEp{v(Na6x(4YBJgd3Fg{^5BY@Q%F+jZmCfo1YmfkPA}E zP$4L^!0w{n9*r&U(F4il-lk}`2Qr`JbKfRqh3$5@5_k47*h91Ev)}Ld4onIxC$rLK zaUbq5b?v(sjdC#|-l{_{P&65&mG^)qyLQywrqI7ft#A?59$0HD8|0$Bsp z{yVSWS89xWT>LmiLA%C_&%COhXI7TaxTWm;@v!k!z5ZVA`6quzh}z`ZAUjK28~yaUMMXQ5 zxl6ZRCpSa+n!b>xxj2E-z8my*%POgbh;>qV(yxL7!zJ24nsM;Nnz}(qaT^Z)n^~fh z>eyc-dPkAdnGbRlf;zUIQ~j+szG>wtr+wns$l=#LM;^Ugv=BJID4j|vfa7BM(QGZ` zAnuvNz#7A#O6YZDO=*njPjo6?`%w@Q9vuf}->LM4=C=B!i28w=zLu7EYWbV@&0T-8 z4QzY|zG1R~G`jw#c$2s-8MNg%&`BC;sIJbMS)2L9!n?Ah+L^z3z{ze7HFfb!_z-T)M-Uo+Lk)X^2R;{d-CL1sV}wnKE*jL zEw4f9H&Xbv>fLE@@n4GMYsn4ABW(SB?En?pvgY~F(F1(5?^F6*1_n+baT`fOmW;u8(RM?ZK&P=N_u5XztBy#FD# z5js=w_vgW^3I;*TYe)_Qh%CMs{y5Q9>oP>qK!lMvD)FF`Pe3trK?pH0g2Pm6t%KGN z#MWvEWjJy1ek>x(07`)T0mL3DIYoCvpb*5r-qpq~y#b0r>wOvwSWJh^!wAg!dmW;o zsG%lx2Tf|(iCg019KjW zLjuX(XOoj+qs}Iz{<&JS$#y#OF5Z%#!E$ekEMfr1?#NlF^W=%&F1%IqORrXK+?VVE zZ>}dD3(bE?QuGhr*?lL9G*!I0R`3SixUxP0e6tiehfRT_%sID`Nv9=If-QrvlTv%M zOZ*2oVSd@!vdv_sskABXsZSM=X~k)2Zp{n)7VI`L1I|m3GJ;*mD_`BXl4ss7TUW=I zX9(>_$J8UjK#A%d>!j>ZSNTgV!bQ0)wS8wYVPeZb8j9b7HTz-v zI~K8uey>KbgKgkMvV&)9Z?_vhx}5eeWeIgS<|u1$jXl|j>PZauTCLbt9Lm514_POa zM6HZ%pMM_RiPM*IqaqqiHc*J;@i%mLiouUVY_(j=FGY$62qSQK&a<(Y6(;wIJ zyb>Z4Gh&T*Bfes4a$L*PRbAU4e7-I*NY8?+kiX>E2eJwhaM3R6@76Ed83&q!4vlM}$A{m!rm8x9J9bx4BISv4?NzfXP%&GSV+ziw zWcy^FQb#L~-2#PxL=abzi|z>ZEl3T26R!2$bh@sEyEL4&zwZAa8}j(rx~O&r`o>_c z3hAIv@QqE}tY(A98iYbrBox#u+YTBp;L5coG0t078Y>P%;VFO`r9nmSS>Q9D3kM-P1kO5h6|`d6dbxj z7RL33!^fI5K7}wfJt0o%_LYgwQZ%_vBwb$V8w>Pm0nm^IiWK`yjT3|HTZ=Yc956?g zTh=zRr^ zkCg50ZH#)EaZ+!-NjCkqU50k8LEO}p%V9l8t@KAmM#GE!s(K-SKo$uWA?6Wl=#Y?= z7L=`=rPU=&Q11MZd}qf1P5>Se&&w-@e0CUsw&5XoZgWb2*%Ntmqg{)2h(B8h{me$l z-`N9a3A;s~c}Zr6$Se?g-=X|TM8a?IPP-DEL0vHoVk?xb14{uuzt4QZ#2LLM|D;*$ zdXbpi;?{2;42UGJ$)r?oNy)IoYTuG72h`#YLzizE+Rmm((8zVX6D669hFKNQ1sc?+ zvddQk!OfQ7sF;)_&;eBiH5jXiW!t9H?*YgVis`*o8^{>Ch@%#hn(vx9$v~d~6^HR8 z2R9pxFr^%Hu8=kDl%uLol2aVz;zKOMorat`BT$UgV{9AofAzQEyNuw&GC{vb zt3w}myc#hMZoOf+%os}z2DZ=d+*A_NlU=Klr{1Ys@XGLYi>S17B_)e27XZ%q zXZ^R%NPip9(OvZ+T}v&4kY2!NO8||rLkcSZMejij&?4jkPX$SFwL-WuNSWTIedNcg zH$|(QpKRJLPtl2989hY!p?Il$gAh*t_oQRN(EEx-jB@Y=zs{4`;;FVA96sAE;=yEj z1iwkvC$A}hB%yh~|0H9zDv=HWnD7VDWt%3e=)M?~^JaiQZKzBrM@CB1<%=Vo*z#X? zn4Ge#m<ech;fo{?|Z z@5<8hQ_A{C+}hJ*IfwsGeWH7A7Ob>0}#r`S5moAN-NCKfaXqZd#%N-1F8-b z;*C`J$RUmr`+)E7MzOFY{l*29A`5Sa2?)-?+wW@&Wip4Ts3wd%>H>vZDst_B*~RJ} zTDgY7-UU+1muaiaFOF{p_3bMQw7cVQVk3sX$!=0^h`dZJB~L5|noQ9%4=O~|S#-V+$KM9q_G+Hrj47NVYK0;gsH;cxxnc)OnGZ zktB73rg1@*IJ1%y(_7~K)^+{amcY_c?8eAGC$}%odlyPSc@;m|U?#2eb*I}!+Y042MS9l!t_^{@Eh20D4i|fzq$Hu z@W>;+$Q={r2&^=FC1p6UQq4%3++X*SG`DZb<6{^mfAx+O~ZL}oi;#kMO1F5iI}iLIOk{qZ10ye_uAk&7hoNSzLfzqV(TnER7kkhhhA z0@{&7VxYKMYyomeV^Cf*_=3`H47YKPXowvQxq-w3$|*=+??&c(kgV{*RLW2-r!>Ia zobt>xmPIU}^bfz9I~?Ev39(2X4|eQiq>6)y+#utLMyVv0G+m%>=3uYao8J5x3-g38 z*uZd5t`$2#>wuVuVnS(z=?cuYH0GS9tF~$CJO`*NQlIcj+$g_J4g`vFYvVNTWl8JGJ5sP|aX4Q$x4QN7m!bmTu=0OlFrgJuNnn2+MJd z`=p-tZbp|M)dWYe%SjuQWzv|lT<=z7QLxd>>eH8s=R{|*K=Ln|{-~SX3{zmd0K#>f z{ZGKI#Ex?QAziGiH!q2v4u-YA-(IiSZif)@6cBat3=4uI0T#mUSueTDjdCOXhq)=8 zFN{KiR4Ma>Z+C{Vf(SJ@Sh45TNqQP>OXcQ!jr)A71gEMF=g)M%bd2r;`CD#3PR3%t zaj-((-1h81QQ3!)i%0;+8~Nj8f>J_R!yy-|V94G2Fk)r`5jZK+3AG2BHU|Sir#>j0 z@^~-;lkaTf7%IenT63aWq=i8`0=G-{{Ln74xbaE8vTI!n(&~L#sos?#c_kmiJ|@%5 zhy-5Yh&`3e>r8?Gfc)59SDqJWQQkS!(=a9AqIi?UX*MhzEc*hB5~V=ReT7FS0KWYg zE!C962fHs=L4L)+S1bHd5aa@ZX4-1q_&L?mi1eX7EoMarLG?xUVV{XZbJZmVkbIx) z@0u}!I+6L^mY25HA%XH=H2%953JD-veaZ7%>)iMqHFp++51`j;2aUA?FrFq|KLPJm z0KyQx*)WJ^urLo2`&fj>Dc;4Ilb$>N-^9|5gG9bFH{c(%17W9Lpfv;<1VVuWy1+^b zi&g#V7rs6@o5zN_=qv1A&Vr73zuIp-^jphGa|m^jMv~~^QjozmMP5}z3_D_+Q0#~C zkB>>q#DNT?J%Ep<9gq%~!D0p@5^lkdvuSy&4GCEK>zUn|Uefq3d>?-nrP?6@n*{=v zdA1TMyc%QT1PM&&=$O7^VZCpBVsiR(bX0vqlf13;d*el4e9f&7e2?fRkI`jB<|$rPh__HE;_A1aO_%OaiBtJ|+`EtVIR6T~V{)ch#K*S`Gfp7P zx^^^~-`v8;-+nxQNdHjS7YL~gm(k0;z;3F97!7%Se6`a2%b`>t z6EjozHHEo<>0w)l4~Gl6A++wsL*-=<0-gtwP}povIJ*LM=mhSK#VRVfpFSK}`vn7x zR9)AuwhvIe&r81z)sp34P1$3=)i8F?0f-p-jV9NK9pM)+*hx(_=ac{ zmn^ANfRI>aHT>U&#Ogw?KuFA9?(A3BAGxkFxL*H>G{-!Pr6!UtO}PjhF>hYi-xg~6 z1uy&UK&o&W=f)1?Osa97bR_EZicnh8tsN%Y&kSCvHwDp#y}eNa9$D}#^Udnj`Bhh+ z$H#A#da8rI@Y$VFEl?Yga|C5As`1wPPGWA<)&>7vPVmqv9CvD#PainqguL=|lqL?u7Y`vn^!zo@W{i&heIfz8qN8)Ig>s1k&g$oqrrPU&6&4C! zc=1r@>rN`ub3g(QckKJ?p)s})1>(Fo)mK1saSy<9?f8YRF_>Vse=P@mWt1Q)2CK>C zmV-AF1#;CMAG&cyI+IdhGK+e5GtXIvXD>%6;DmCaV26f4DhM)R9UudKKPM6Ztu}Y$ z376#Ln=!(P-BeHrVlslN0_CG2&IPKDJ~xrI)2iDc8(8byk9q5%X2|>ugo8pvG8=ob zPo`V0W!2mg3l#qp5dtdGrtiVbn74$J0hG@AmLDXT!jLZuWUkMNyz?{67FiB9^d)!1 zgl=KH`Jv-o{dw#5RF*36elkk{Fu6e5K&ny+5E|@n`1RZ(8b^$|<~AGoeyq35vCP`$ zBQ*n6M#t?YC1#ZZB zL!eCeWY$1IpO;N!YT(C9`3aXZAe@282xLJh9RZD=0U^wtmWH*F+GfU`TT?p@VJJFg zeW(A}5k8mv%ZNH+vM@jY3AC|bI}u5&wG|nPaS%1jn_{ zRxX4AyH9FxCfkE zytVqOem}{!mh}&;h*%8!MtnWSqI5#5zp>DO-wl~2SNNSB%)7^VcIVmPFC_7qr>Etf z=ox%5DdmSbc_6>$5G?RpbNPJr*g+%%CE?lF0w&EbeRIYsg{+iYr(>KVznAu#NuTU1 z<6)b9hbwxPy)BD2BY2D;sG{{s_$!i{2*q#gyyZf;R_l^TdpEK4fGsC1#9jG!Vvel# z21GrL!nMvBU2@yDf(MBLpn5a~mRTmHN zq^snt?BcyHad zmt3$cdui0gu(poLgTZv}8xzZ=+L&=R)yN~xRE-iJ59}0_rN0IfbIL9{tdur_N?hQ= zgy=z!FY5@fMI^Ptm!Vv6f%QTHO8LgT6dHpERt1n9ic%{ba;xa4nRXl`HNJl}Bk&+7 zgCrpEt)Z@!m|4LpAS}gW^(%d-&@*0-5Or{kF2PuOiavWBLlw_hJB}fq_F$m8Cc3E_{!f6Lb%EH zX-A*eGG^sxW~21(RHg(kjYvJKIPm-A7epN9a6@%47!Xq9kYXJ9;!8_Q?=hY}q9WS_ z3*sB^i0;MrOCk_xFtHlTg&rqN*tWkU4n5h!dO3dWrQ!RhSYJbiCE zUZd1?oCx(tTKcZ{l*VG9gv;0dQKM>(QG2yvvdT+HjDH`zmnyv1<)OmvR~7xyE1S9D zC8N)KH2k7Dh46t*Q>K>xMgk#H%SWz^t3FZe)L4Vc``eYOs zgL1DzHSOPlFNtb#Yd3s6ogx-LIU;s0z-VlpFmiZiFfC0rP-U{2Gd;NLX7IK}zHz5q z?$VQum#xJCD;Hj9I}pB8jCmP#=QkODXBDSxc2TsW@97JBc*jpm=BdeZOY3Odvf)f2tCboZ4JTfJ8j6xTc~ZHS?rtCL>v<)lZIwR}tF*M?=_K>kSk6lreB zr&%_TFc=oH0`q3RRPl*^J|Nrt&$ED(Ynvw6W$pQ0BuBLS_$NvsL0M}qRM-7=#Ab;q z8OVGjZeCXcP)`X4N%Xl{C(LVZ1{W=;ink;wl^Ql zg6`lu@Xw~AU6xhXeujmYlvnLPvE&g?+gHiB`}M=sxe+Ij(18ySgOViyp(TOW&D3c5 z(Cu%Nj?t{D+fS6LQ(_tiN)|sSKI|=;GGtET{XVokYscIzx8k^duj*dNN=&hl|KjJy zwRA;pnFu$vl#JR#ZG)P^4KCO-V)j+wa6Uy9@nhrVQNNB)wkSf_$Unju*Cm6D6U)SY z^OoygJmfPYwpSv=zG*(EUpl%)doA_`U-y_dBa16aF}Q~j$Q*DStH~>g-x1oSD)oyz zE-w1@g-CSnY4L}AB=Y}MFSsZ$VCrQ6 zr*D`?&cGV&C7PuXmz{`UR$uyVqL1}LuO^e`dzh=D#BP3?*U04>YhlsZZ?I|Js7@n& z2nI_Cjwe5YYPb7BfB(+u#ofT}IkCI>yhGE=T?_XZC#%rOky-pJvKoyqv08|g7~4H_ z0+K#XOz*b0`G{;zV82!KBTQD44M*O6{N%&;Rk9L!Mvq$e+;5r-x}Q&Vcsqh679zNr zc4BbAkGY#7X6LR+@?t(?Kriwr4x>Ypg0IXhG66ecWb?p19n*bBQtul7K3Ld2M)O8Z z4)RF78^m7!90vP1Oe31JNk(?7$cm8l_)ie`kyuO0@Dt>6dpM54jf3J){|2`3s)J%X zeNVC2YAwhZsDUrr@~QILchH}5|68`Dk*@;{i1IGk^J`a6sy`L+BAd8|DV1 z(Lcbn((@b!#1Bz2v;00oY5jVX<=l+?fiC3}7W$NG*Hz&%?!<6lP{IS6TRYV3-Y z&pd0+tkzeN&Nv+X<8jw9_0EPMJ8@13+C*o0LDwuIPxz&m+-JjN3Un*My1o-tNX?Un zF8E)Tb~0w=8g$9mQ(vV0bICi2*0JZ30|{ay~4wWaJt) z_M#tQoU4BFCU*51iR@m82Hoi!aC%^K_c~)uBl{c8jJ=o~NMJ7FgGbnAHUO+M8QC7+ zJhLvM571Sq?o&ILtj{CrQ28Z|os%-0)Rj5}A&bRHZoU)lS+yHbqH@H5G)v zGe@)J5!8Qq$LInFyEb>f@36%o0Z1lf_0}MlIrxmF)JmM_^$fm^D1s zDD_Hmd35-vD}T_E{e^q$wamu$Zu+`Q6iOOvr@kn(J0BmN4Q zq#M4ZOXtZq?hwO|?&N-d_0JQ&q8sJ|x!rpTcD|D5Hwy8UO@G2C&kDi;tKHR7DRXtn zEL934*!%leUxn9D6J$o0z0Sac-oJOc@;&VwWs!;O&LXp)2Cx;Mhv(=p9_P;KfB$sp z$ycPEEIKHBDL>-mpZ*tA06+QA7sbRY#9(?Fkcn=3gC{yYsv6=)87T!SQte=SoG z20;%WGW=rww?0fhP5CYQ3J$o1DM4z)f457&CAMVG10^7|0rOcqdBl|KFbv*hZRU#N%=t+8`8l-lp!?tHf*7Xk?< ztZA+lhV`S1hZqeaiT?bNj(zod^F8Kg9Y&4^e_vQ73xg@yu||i znB)Gt94y7L-#f^)!@^3+!Dq3w%cB2MzSKE~UC%x1c6K&6Ih*bnT90_lus_^ICFS(_jo;hSIdk=rg&Kbohu2SXgkWS)TA|~2TbnU)V<*VF z+qYiyEAc6$qDjH$0zErZjXjGV*`>e;y3-eVF=rk?E-CwXo`p0T+a&XLT0zHmp2 zpEYo+w|e@2xAgzr(*Ji$|KBbB|DP?Ly&fiDMq>6qK{J9R+CWFcfC&<#_&qG9{#%GS zb$Lz4^<3Z|Tx)-n{QHcG;?s`K#UR|;~ym-bbVzIT|xEpiG?I~k9C$}(Em^uFM+x+=&fk~Of!HrAtG8WKig73`R?8V zp{Sh!6fjgti#wbCU zj{~bOxy^!OK*ai6rtR(pX^lR9G%#pwH%znfx#al$&>n^b)cjm0emx8|PmTLYG+Y73 zyFiYoRUSvr4z6#>C-^7pMVuH;y;DHlKn0Xun3tE_Fy-r=0`K9z6jYplpzUi4*kvdW zNh&bSMHWJjpuV`J4)sONnH~=`#o*7pfjc?d20L4r`+SbQq5Yv?5KgjM@04~=M;@Ut zDssINVcpLsJG{$y$Om2TugxiAMNL6EroM6~+bj0t6)b^E395I9RZ9(F$Hy93>CQcO4tu~@`m0j) z-@>6i`}ZEIaXdPv|3fuh0^ZPsI5*W3O5 zDWR=pfS~4o#T^6L@R?q~j}QbU|HFsYJ7vX=`IqH(?Ejd5?-a&iP%;_Jz~MJnD>=x= zr9vb4PgSu~qfgo>LjB|A^I!0Xdr|-Qoj!ag5ZM3dp??B_`=62aLnQ;hQeQ-L5lgMW zk;SC-ug-n|Rq7}~{)?-*NLnn<{*hxu(g(2gzswn^qa|w`Oa9%Q(y&aUx@f0>`Rvl+ zZS%YO5i!>&;ivTpcEyd>{I2`?{=HixtXajKJ)(!Djo+S4H7~rzgURj>dD*=c> zkmcA;QTExqe9KUJu z2w01n&W17eydm!St1OGVXNhoT_nSiiW`M+p$rh(S7d#~ggYOIo548*+zEy5w6OTNz z_JO?`wuxT=8ZFr?i!E-^HIG{l`A;jy9)GzA?p5w)Zt_eQF2p|a7mHJ96G7V2&il0G zA%GeZfi*%`Y9bB%xCD&9@&jnErHSD`R;V5z0jXfLXT$>QVq%Us24&TqSjj9jRh9E6 zSFq01)~DpSK)VWV%RPHcol{%P&~@dPn^01@II^(5n#KQ z7fWaUNh3&YXzvyS6r#=P#G**<{(vrXs3Bz>^z&!Tp({qit&ST3tPGj42rTH}^Z`}m^6-*L+0xk`1xR{f6 zmzZBcy9Wb3q17r4+Kbhd=Ej22;2;#!35L@BFl!VE%!ZHwT$aa#@N^kSByWVy z&#G{#<|p>WFVR07=_v#Ib`xAP@=~k0<66oku!l)00Q~&s?zHD4Na?=2)CqF>?k*MD zi!M)A1TGJGU;dI8C;`)m;D1-es40cKMQIjCk_Na5_H_hqkcB9mvg?Y-*Tqo|5tA2uaR=w)KPjGL|O=|!*HB4 zFE6K*Q()fru1(v1e1#JQ$gigzA*5~Q*hHIMoaQ9HpOe6T>19zuZBr&d!gO@3-U@f3 z?kT?7cj-b9$r2-CJ+GM4YVL0Ur*`%)F#R#)%$4>@=tR zd9T`(gt8N1{uH^H1`Q?8(Vd@!mM%!=E8Gpa)%5WZ==~Fg{2p-Atr!Z=s2>-wgz4S~ zzn{)FEUd+~IcYn*P<-As@#EY_mt0xvO5@Owhqi;em+?PSZ+y+(#8xuY+$-OeP~BOQ zq&fHM)j+gIl1^vaQkq!AhD~caT}dTJYh8_D0N(DC^@|1U+K;ZmP{0!T=qEr{L25XSxw6^vtn^jhosG61E2iL|2sjm_Zy43;{$=0osX>MYU6f_nFp_TbE~k%ZI6m@^aa^>M!+&M&n>ZH7nRh-{#cs zF4Rj_A3u2#oDz4ZTQV)4rOt38vrDPplYP;oH=>UxfmNYI$WWfhd3ZW)pI2Bc-&@o~ zG$9TR2!%Ht#OOWJ&kYJ|pHNUJ+Ln@D+J8v`=ClGK&^T*SkbDIKBVVt>SB*ed?ss_m zPutq2480mmD=p{7;l8 zc>vPvK0~SJh+*fVt^EcM>g;(*>Ii3 zr_l7-bt?$;uaDR5yKtVuHuvn>=e0SWmc^zJ&JB2%K_V-Iq} zD1>zeZG|yfd#`ick39psdC$8qD<*dp;+=azLcz)&Z9&**Ai`JZ{Bb*U4M>Zb^XFMU zz}B~$V4ELEx^nNs_?t!ia_w!Kq3G4jS6}LeckBSKHsn^-qc9|tfIHMtc+byI%;^b= z6?=FcI!|rDLgN+S0VBR3=qDj0GBQO&iYyaf?*F>rM%{JuUX}U{S2*MwI8Iz1_PSd* zXmj60`;VzC%DNFOd9QfaZl}4sJfZ_^lmb)R@WVr;Ja&oPERs6Z-=JyekiTE!ujrf& zI4}_b61a%^F?IEN@Ql9%SAO_(D};P^%dxSqtfPJi4h3Tcr0+pXi75ny2TqTDe+G`A z6ES#0Xu4T7cSO_=s{VXAyzXujynG{E7?fs(gBtgND;2$$c>tK#h$=3O?8VZ4nm4?O zT}=CVeIPMlxE!7Qp7#na(P{*s>s7S}v8W-?9C@>JK?#hG;3WM#SPSXFewQcFRA~5q zG$IAXU0{RIpoJGjPD0?L7c<{eiY3r)mLf$TEgfpU{!#c;@}_;2vb#cq#Gm@jcH2 zuTT*rM7=;O2XgfEGfZwm9xu0k{8U{sp1G}|TB-I$EVy<8Tu zCszFtauVS;eFe|bhRcALRxKpZBp*uZ!06PS>j1a_EwC1fgbPqnBYm+2Do)Q)pds!q zeNC{lLz=0lb{zf}zom;g5CB-PU*RclXGqF6%W%BNz6eMG=sEh`L3m{;X`dd*7oAUG zbARtR3M!E1Rv84$JUbHuUW33(O(X0!SvBys2&TINphx#21)DCsC@^p1$C<<&g~StW zT!!CgB&~u@wGX9!gIclt%vM?K!wgZ=?@#|4?S0EC5(v%j(I_OC&Lbv2Iz6i zwF_9x$hy0j()>m`4i7Q4FTZoycH(AxVchUzjwJUjTH}l ziJ>2&1sriKzz67OSKeFHGZx1?#KLE5G7&sG4Eo`o&zQ7{q!EN({+?Lw`EVx&>4iWd z`T3I}{~RC!52F4+-)D(+ySPvYx*p&?qR9Xzj1i94AiSr|``+=|dJ6}v17(S5r34o@ z^5#{Vxu~rEBhehtQTfC}NEE;(Hp2{dkU0Ny6fssGXGKJly1$%pdwu>sts8_QxZ$%f zB{`$udH3d!Nb8li3OhcbKDkKvHz4vVtK0FRDQxHt3OT1-_L27%fEyU{_WvGn>|~(c zX&9J+yy=jV_7a@>D0~w2VM1S=-dD#xmetx2#EE+Qp!~LhplRLr870qF)!Oz&D*mkMnsi*s$ zhEkj;goFmU=JO8z+mA=Yf&uXo{%oojjVREN8>RAEUHUK{12EB_> zkWtZ$GPGZHdkY+zHTVTC{iZ24dKs9U;nGyB6i_YP;iT~%qf1QS(eE*1S^SqQdk;^Z zVpfnl^5)*sY%rS~MWc3A-6D9{=D}0~v$Y>V&J4=SMDc%s+8R3w9Z;0Om zNzjpLFg5-EE=`-)91KEevIK;3ChT+g3aRRWRo4Gl? zON@OM7~wT+arNMPg|FN#{jrB&Tip5=T-vaZ z?L;5ZX|kAKkSb*p3GL<0Gw4i=_a#Jb)Y|;J#kat_ZTB1F8}{lo)v9Q^XN#X2d=jkl zptrjDn8o`YC8r<9za*+1yRLY@H%>-WV#S3eE3T_7|BaW{g`0>g>oKzF^r>|-Zq98Q zWI7K!Q@zP)PYNi_Mw4#FGnDo@=5Quy=#|ZrfG+6v;ZhQrUCv~V_E$m5W6vgN>RS@H zW^%~3DZ|@K>q|h}Wh?fTTJK^*xn0()?ry)_>?z6MNoQZOki&aRm#BP_mRHT2vEj~! z3bi8sy52&k8J*OYuO*LY6+K}Fc0sU0G~wmX7*~%*LyBx60}GscTMis(SGqh8OME>~ z(8XwitG(8ztFZPJC{2y@beK|=+sqi6^EX{1DuV<)zXf4N`(>XztSkA&~}Q%2OiH+s^%@Wu|Czj@Z*zA#?dcWwUB zXN$h9ZJ)@^&V>_AuFn<<#84;4nC7!hC@ zf11zLnEX)Yki)$>-)7=~!3;vgrqNyf$0@Ye?*d8~pY|7Cp5S7%ca43pBL#$fzUXn- zk?(+_HEo0JgkZ>mdrzCJ+qJpp=RQ;=H&rBNnp2CU428G6zh6D)La^Sr-tagpVI<; zyeO%^FqH*$M!PeeRJgun){#`Lx0>c{`LHnKv7j~IXFRWpvSjpftb%?(PWd$;OY96M z*7CGMo|dogNy1pkNq+;Mbout`6qp~Xw6RKF+WmCFn)FTi2tT7 zQtA3znvGpTQw%@%r+NRX05Ke~9|lTQ^h!@1a&Bw%+GDBe(RTJ3f6vri`>4uOco|pu ziO*lOxx}zb7Z?9H;bP<(zHy1Wp8oRRG*)!0pXra;cVPN+T2kW80p0JNI|I2dm|n_j z9&IC0W8JK%&v+~q20q3p86Q3jm`p@zrnQy|*5q)AfiOxdVjN>uU&KEnUT?P^TNXV` zt%Wd#bA-JoQgsXC#&Ob>pB4h3QWC7CI~aA{Xur5;B2{;b@xtqvq2#&76EP2?CO7Oz zS4-i`e!uEO@lrAW-EVy=%8MOep5JR8CLUR$?LI!6{m^VDrxW9-cE&4SV3DJ>^&q&`XNmK5>fT=1wb;w!i<6Bbh+7C0K@luE8s8x z@3*#pQac>>j)vkJn;`JLh z(n6ieam>uA#d)@#!;X!N*4Hafi5C`gh>ObK}+}lb^ge za+1oH8$oMqUSbmN-4!v^cFFeH=8`oKYW{b#+dCY8dNMSoJB95fyV$; zgaw2{!^0ud-Rwz99_^R&;g&(OU>BkB_g8DVJX-&K|e(Z2uN^#!?KcCgZHH+MU6)~RGB3PtN(gQ-~P68oeU7*hH3 z6i5cxDlCkOg4uy{C2i9qC}GO3pqrf!+WLr7Z*i2bbaFN)Y&-dLQm@y)G3Fz|6YIGhaR@do~xThO?EH0 z_*|hTuFv%3s&<^?AGQbb;u>TNSD}Xp%xf4l3|B%eBIz;dldK` z8*&&+1tMtEvL@@9b?G;+yN@Rue}EKv3m_Z9oydcRjQPnDV@<`8!>te`(zh_PFt5~l zDIw}YP2|2TI9u)JBGH|O(LGYA%Y38Dw>U=hWRV)0u<{6k-e%!H4+lv=4^~|9i70d> zC*(V~+Pi#yLWOZV;V|u>5@NMCL(x;B2yp<{AW`8%$d1!Vb5&5Q#hMd}oX%B^l1Yv( zx|?9NiAMWu_v045-R~9aStb;sDu&yAiOMz|L36Zzoes3gsqLDPqNv{eQPBGUhUX`XL4I+g6tj8aYZ&=02z0{A}*PlpDUurm(M&{4BO>9ZN`SpDOP70mc zeK3RKAxxI8gz0UPFzF-^ivLQb*|$m+pj0#uJJbS{AOP2P0koDZU}_6t&*_UWe{e~k z-eMjQ3NOPsD6hp(5m1ivIFB0cT0A7sjW^>7L&0ksVXf*{mZLNbhWH`a+4~5-0w$@9Y5?HgA!8*5&AnVnsQb!yP2U z1HDg+1+(ghIB?7uHrXXIFRt1f+;Mf{%@|%mjg>`T#K_91){$e}EJ~r?rl=aVkqx$6 zDkL!gK2^X0F6^_(F=C~lqV5oR(x?zJEU4zVnF!+Cpm8|db8DQiz7^|5q+0C2jY%>x z>PQw)EJ$f&lVKgBuUt}IKUad*)JSQEng?mG$Rz;9!dE6=%ta=tZRcok#pwQ#6%^u(4Lu! zFfX7g5siiR+0i${#(k_ygVCApuH!!QX=m%@F_@sCdahD(i<(;MI?{V15-nuLp_!a> zB#qSbJAp91ldn-$2Y7Dubl^Y9}X!92j78C2Q(BB=2=Q;giD5^VWDua%kCRY z$)+YK&AlADX{I2^D`+NwcezjO$x6O(rdb#L-ol}K8W%b>Y&sg1nc`pJy%OYg$v^5Z z6G1y#)nhAV(8zi<{0j0vk`;KOhcEGWacwXTIzgbtwf<1|5tRFyYWtzTn@rbgDN27S zHKn(fPfCAu-7URQA1WV4|8*dG>ey!1GddJKs0;V6U>MD3BOZzQo zMyKBxxudmYN!@+cb34pWzoH8$+c(tJ1#0(1Pp`Pvl>(zy@2_E18Pu_4zxOhXpkJBF zdeA+eH||jjff}SPZk#I&&e^H3J?E=McbvFk=K<`xCyja$n8H96fU6;EA^afc!7)jh zOVll#jGISNYEdOEKk>^Bv|!hkkuSp`ZPvZU>dkcB$_>$0?_B6Q2gQH|Z8f^gHT5(! zAf)%PI;j^f;Ll}pMUP~VKn))&ou5=DNb&SPx>ow85#k$wZ6@sO1z+o z*RHLyO;@F(vqJf5M|)bHHwSEZC!F~nFMYd4U2)8AoeX2?lL*>=*)MF&x|d58Rw?}0 z#lwRRi3{*5w$Hg68^`$C&TQ5GHkvi8E$3e*)IzK09|oETIU6_5DaR}{|-iE;9Z2QI)#9tIW1 zc?i?q(lzy7K8KroTfOdbvyr8{bE>2bUYo5p6UJS&R^}$#B(p>Te^)0t8{N01BExDl zIdnhG>MA+NeorH4c{a-HnZD=PXee^`TFt)x3P_{hjE-v%-eJsvqA;3SGZ7cJRiE>q zI6gMf(ZG^Zcjcj#)GbC_-$pt}Zpk{fqg~Zl>w?t-SEoE1-O^GKA({eq%+^)I*9b?@ z@Yc#4OkV>wPy5H%$M`xLT5?u!+!MlGwNVaerf|s)?8ZEBLVS}$#De}KPB=)Tn<+G`z+ZBv{1f(>fB(y`cPD6v zw&46(pZbL>jNTb~)3yC-1|WpSODcQn_@jr*%}|KHCoM_3aCNh9 z8OH_Fmrlvb_(1QnRwF-(*Y;GR%v((1toxqQMH1tj%=0q`RQB?;k7ev`#crQI;;(V^nlTyrpPjca8~abw%p$gtuiCIEwN?{&Ax7m;UfT3y8%q~ zI&XR`xE^P1^O^$y+e@xXx<5K|WZ`Zf?J>>sTTbST{XJ|ws+!=MC?8L4kz(B33hUOl zj-=ha8OM55f@rGR`1)%Bo`xkXKwr3 zfodsYG0eMU+{Vo+7mugfe}HfYiLU9Ue5~{>g1>6>VfRtwKEaFA<_%!2!zV}ZX7!aE zwkCOl&C0|`0b!{CR(o+{YQ$gI z+fA^_n(~x(Hd_80EF`@g22b!2d~86gT>|^P-j3C(1S1Sm>Q$%}^Vg}-+g>hpt?-HB z7rZ);Mv{iq4w6aQw_!m?=ue#o;c%ndSp7BV(yvll7*(o*txyyUUj;>UH+pb@$87Jl z@6W?4A$?s04(1tZ#@kiFXs+J;_n3Kg#KP_S8evXFqY&oBUhl0{2`=M!*R%@m5mVXr zFL*#){UrRw^*{@W#OsoF4Ho&gcJ?kV{nB+Na zui@fRQIJqvCXN^4g~I;lhR_m0^YtROq?&H(H-_s}&IN>@oQjZCwKx$!-f;RF49Bdf z3>E7GZsJAcmC_Yiz)Z@|MM9CL_d+vjCn{W1&KI6aE1tF{z>Y7r-|YIUBp=Jy!-r4O z3J?NYYq1#@?V6u_@q9zGiX2q=yq2#0aLZJ4Ss40W)$B}aZ@d}A!P7rs8f1(y8>swx9HO-*3u5gb~@M~!@OD_6ak%=W!3lW^cCza znu@<+uKi#){au>c2r>{D5N(>$kDHC#dGM8x6VvTqi% zVa7TI0hK9n>e)I=g{&;O_Nw@x08Q3w@C8s`I{*@M%kDkNkW!}ZzA)2egJa)+KNS0d zuw0C3({DmNsT7SOp}&O-wx39WL-E;Y$=?nPD??i}TsZH`AolD{m-HfO7Rmc@N8e+w zS*#!W_%*wWhM5ROf%#BU3DN?AZ30GiGt3VQh4H3_ki)(Wa9rvqo~NI61<*N?s!Kx~ zYL1!gB9c?EvqNh!L1ap;NZ{1w+|9%ooPeSiRt|- zus7H9L`VBqilIa{XUxL5@xnHQ9`A#7aIoC&)%Wd8#InJRj@%U8%tI*RmGJoq<(<>- zPtP=Auj-a2*Zkjr{V}YbfP-N@Rx|GBfS+FmCDNlD-rFYO;a);vZLv^`>IHE(j7p(Iws(D^2pS2$ zkMF84SWUBaq>gm9u7bnXEN`&ElbfoN!2THaxm2hFMW4JkTW!n8!-~Me`0LiOd+o^r zb$@(mDOn+kzv+?TR0l$mz!7y#cOeoYA=>5Gg98(~h+@ifFCvi)P%QsdUn>WSH(Hek zq{vUs0Q26|!J6s*SAtDwMb%BY4XjhI#~Jd@hA06&)02LorxK;T?{qLdHG$E}z1JCpE=Is4MF zx-l$)lA7L~3&eg)sck;gr$O8TvVr#nx)vAZNdg>RzpT5d@GQZ7XUpY5M2l^JZg`hRG2*Z`w7HV*^c(RGlF9(fZR>J z9%7|N!JaN(QjJglVkM)!GP_fUtK|^hw(<4EUirhJNk&P-MN@TX`;izaUcDAxHt9fa z1KB_>vQ5H~1l9>xS58(ikQJ9E4HJMB)UdZtuZBp&8l?rI9!y9+f91`u(x#<%Qa!%H zD7;`8Pg()Co^=6hY4L{GJwMJ5y8;hecBds*i!#2aLAemV`K$~kzOAHXVd7gvg<2$k zoeI4zseZ-Y1%y?^IB;VX%;%h8{YA^z%_V0I@5wsm+8!0%M!5<4zM{N=3G zB(@RycrETW|5?B;DG?W6mO175w|ZAO0PVJ0R%CBNhee(RP=U4`?GG+0dw6)1OdOQS zzW)6b0cH*M!3>>{k5A-9<{vF(W3R9XH`Eyk3bp?SN~l3@fp*DC6G)zrUcLXRLh7ON zj6k!LrMTARB+4c=*tx}oc@Y~X$U3k!BdW+(?VJSNZDEXLe`R-jF!j-77z6Z4^*wZn z>WL$1iW3{xXCJuxyg1)^6_18Olz{$>(_?o(IE|Uu zTr$*=G!X7|^$)kPC!c%usu!=i*U|q2S!Ii$VQ$gn+?tqZYr97her*)3x2N!mlVo2y zMT_-a#aG%4U|-uhMvg9`J3O?a_D=Zl%FSknS-SqN;g)`NzD~2>_-v+Qv`Ej4e)H_@ zi#9lT`*u~2&TOo8)eyew7%oGQ)2Gz>@`@ieomYjlbCM&+;IS?ChD_dx`agTbJM<$hXg4z^J+fCH)Xb3 z51l9mA2{676B>*1rP~FVErOrXrDA*;)DO;fR0` zor+4=uG*O|_^)BDSrxB_cbJLbqGycWInnbUPXg(<1xFaPf*ZWTEnTFkG(iXdV4qdj zix_i@Jdw2Xnr1bRlWznmv8on+7P$7^@;j6yVe?HIESVnMcA#aVg7b>DXQ zgvB_Xt@;$4p2eU{mIpYORcC2N^M7=IXD`+UeDI`jv1>_vQNCQe03UN&1Eluf;nV9y zh}QjEXltT!p(@aDGwx%mC`5alEo&o0`_gR;fky%YxR>enpr5=gs6Z)7H-6f+U8iYl z`^1VEbY_8X+2PRLL?~g?wMf6on#3d!hqtO=dRBwzy+u9CHvn$uQ}S@&uUe!-2xMLM z7yb#dlq%H5W!sjrYS==>PLf8?2*vQSzoRJ}%CWasJw#p{9DCdS{T$0p>7tyuDtS}uoa!PDOdKp?kB*$Wkc6nbyiUIMdjFY{ofSJD0J5iR2%^#IgJIVd|IL zEUjzZx$~Q03U3$)Gv`3>DCtZ`aBW7r`wsSXW;Q;U7$Nr&2@7rfb!E2s=h&`ZvGY}I zGsV6yGbmsd@DU~V9Ghbxx2tSX4+|;=G`l**vBLqgi!{u9i<{SmM0rUFEiHm#?bs@% z*()Gd}1xEd>$&zWx4&fCi>=KtA;B}Fgvs5sHA8F(KcJW6B zOb%Jw0EA$N@-PO5t33xjz^<5S$^2;Vz6AoY3pzX&=D0kTo=tW^)#%#B2N=#f$;;?2 zbRuC$`~pH!rg?BnFTmd-W#}IT54CaDqnxcP$d`aRjT4UW`JVM9=sm9jXV^Am{KaoH z5zt4ak+2kJm951VTt(HfMK;{REYC>}K!vJ;6b9I&G=72#&g@MDy~i{Qc*+T^8#;V& zkm3lGpdq}A$ngG5Euaa%(nO&R`)DyxDu8IO01LxUqCKb7mVO>keX6D|VCj{IbTF`i zOd0V<#Ht-HSX|BvSKoq*y2q;a@M4pBg-!|Ou=B%&M@eq{RIF`Vj{1zYXKKpIkDlFk z(awf=ovbi8{Rx*Q?dth21qRf-E8P6)TRc(5V~K6IZ;Ul=br zret}hstu!yfn!15fpy70Z`i3V5{!C65?=m}Qd9xx_8ac#r2MH0>N9h1DZOtxY;uA-X3FU9*s|}2GA|I8QM>qZ zx?}Y`s`okl4qCmNKD3(5c>=>fu{?u|4BXJ&X_x3ltnm(G$e|D|1k)mc8l)BvuX3Jk zt1$BTAgvUJ)C5i?Cqg@jK5}GWMj}-C0ASzO*gyBNYt|p87hWV8UyQtZ1TKy1Ul5R5 zW54%s`Tq(6Il$n9mmYJ|%_WlO=@9Q91T*r^!=QCQpO&trtJ}iEu>$X%3h??{a2a=7 z(@wMD1G%I4tXxvVjKFwOpCY+N)*R6Fg zLXR4qJXtfw>HD!AJ^RG|=T5=a*lv{J)0$?$mKVg`$#3;1!YD8EY2(i*AH+WQ7iL}R z&Do-fG~o4CEk1Z)J%CsTL^aj}pe++s_19ozS|zylcL2OQ>-1|BPfX zaxRe{gE7{NwW+Uf5$NU8kg&QZeUDY0P-_t(TV}fCBE%3gyrhVmMpoI67+gaR=xi-_(X^r%U;pb9=PMVijV4kS^e2+#G{TlaPkLXG$ zxqhfC;p`3k;lm&X#?r?dCdrMh(eY5C$zX~gqsCQ?Sutvq5UqpP+~9GzfnRka%kZU) zyjc@*R7+Y%#>(!Eb`<+!mR+o+A%;u9WqUO6pB?R( z)FQehf%T7Pnlf#xWp;@YPMEI)MgXg4g{5haTHAaA+yAqujf6U?kx{E_G!eP?LhbPz zI#M1dB_nrh=tvq$c9IV;a!Gb!Y0*~++@vf1JKhF6jV0+o;ad%L8OZ+FLWlV8%5f(2 zw}olB4V;#CC(3HD&(QK?&by5_B5fJ@(PnEGW&?<@LxdfeXi>2%nR8?gmdp!ELnMFSyacE78`#cgMx_?u_J@lAs_6XhN*m96Ep62uc zs>vN-Gbp;hNg=Phzi6t*FAH`MW{p$vmW2`A!e%JpgHRilUD3$<((=y(D`GxA_%2~j z@GF{SR5742r1GR%E~VP;4xRmedh3g#@wi7fG>XH#{0U+P3_^}WSHknqfEBmPUryTb z0Mw|PY`WRHmb9%WNU9N_f8l&+l$y&GJSwroqG+2}$7O+8UY9(O$=aVFY z^U#b4{=iuzTkriE>z2=qfOqxXry{A**|;?|B}ry9MXh&}xsuVt;2nQFGK()Cr93=3 zKSR(N0#Fm)DW^46gitjq-y)rR?lMqKLYXX{`~Wo3W)SNWkS7K8crojI@LOTi^e%#V zb>Y#`g6KTrCVpX)h9;VQ(9qil2c(Wt+(vo3$ZF_)I(!&KU%H4Nt$kZ3e#@WmKk#vn zbvW#JaR87%r)l5Gwa9DvyXr&h#)_tlo32iHYh6=IRH30qf{X)!&o#m;`|*?e_RS8z zwP;K_%K#!>7rkj{zvuu9$~ExeQ~~SOS3CcsmO2`Hq-6uwR(4M^?QLFefl?yf!V!fff>q`fIT#4Mm2yz1yIH1xU(m zK4FP)qJqC+6|i=2YeNIrKVFCGBF9^Vm>I!cJiLWd19O4Oh}TT$k4fr}INB|57nelL z#9uY8y7c05KqU`Y??6eXKEur=Do(VROFRD1 zeYDTv1<bfR<$(=ufkoDsKODOvaU6!$)92A^Ssp zT%9a>OmCEB3^Uc7y4>!;6a8V!otEpQ4Wz{;|v0WCPq>s12XcG&2%le6Z#dp zc96;BYj3&aD~7w@_CaiZufisO#(tRt7(d7@$ar%~;8R8m>5|RbgZRLAZ*AA}xU5&+ zCeA8Zx%|jao*yxyvZ*UpJMzRi#g(f3yej{2xT;>$8_Z0&Dj2O7R4(XhJD5H{@6Ho+ z8J}nQQ9xtEvm^dcI}{#9G7Pg%%3}w3zDSDjG|5Qh|W=FUYiP%3z zL%74*fU2gPIQ>kNaZ|r~V!&oeMion%(a&sSU92{k3Ea4*4&sy2kz&6j)%lfE_`6e=@!Q0W23~z<&Ue zOJA@0CrOU=Q3(A0gphxJ)XHcUCgla_30nF$^St(#OO<2(yPXfaGwMYY2wedop6w6g zpl|9X-dn#gZnnrEo3;&TUD=%=Ji7b?>_fjSHu@*#yOKRzNEo$59P0D*bgdcfFA@E5 z|6*?23Nq29oX;#}Ei(|4w-Mdv@89}Bx5B5BpTb&O@A?qhM}|2Q_wG@Ft|{-0jxDCr z*S~3_JsNaB=Ru(2X1c;*Ypg_5ah>mcJ98G>^`5qY|5FGjS_$xR((YVsL1Ma(Em-Ts zs~wjil;n6GQyfp~Fo3)!y&^m%ZwIKvZmDoDOS-*E89%mImnSUR+ag$md?Z-*8TA3~ zXWU2pj4PlS2N0*n6D5a=%{~>-sngfBFxM6O;PHEV(r4vDe zGuu`QG}-;@SgMN~9HS0&&45;~TId?8fE`5=c`zg)?b-FMk`q{hDq$p*c6U%*upVZ% zS=8?1EhHADxu>$9gm6)J9aQ4$)zR`ec(e*SHw>w!*x_=!^+68ovsq`O0yJ5Sg^2(H zq#|a{-J{ToAB>dUaL`%g<{TiLJXF?3Ux{bAXyw1cLHK!M|7$b|6|NdIw4AO|%C~Km zh9Sv@JNF7-mrlmi4ZbSuNH`Kf8#VA5G!GJb?nb|$4%tTe^HR#>YaWeU~ z_g~=J|AkN zD_Hp=bHlPHMH0(08@zW00iF}BF`%=Lr75xA(Yz~k%>-{mn8E44paY}Pg!3sb$v;XK z<`KpM*WYI?B?*XdDJ}v|Cam4Q#E>%bz^MWqMt#pz6&RQ`XMpg%GoD}2H6QFzrg?)+ zo|maA`Rx3QirK$7r_xkdm*EJPk?(B*mjPW6qR3551m!&xnRsFT4;Qr3AeS)|CsDwz z9S6(L#zzWHX)7TuhG|uASF{S%N&3+)`NiJImt(@2-Fbwou z(L2xOLi~s=i8p_IGZc{BzbI64b&#@8zTF8-l&~=5;ygq)$1o{l*19=ocq4Qcxsm&j z<_}Ge2Qj}pw1rQYKK&jSi7I~Nt z)U+d5bxSIlQ7D84KZBc(78H7}G2Bi&@@!HFf+sXy3%84ffT8B6mjSmG*{m;DDu}#m+%;a(l ztApPU+ry!WP(4{?BQ(L=LKo}PEy*>|e|Y%t1?Z2gRAY#Ij((I12F^&i=|md-s|6+) zb@%H2cfn|#d#8--i zS}7HY#hcDoPsH=nA>_KDS-iAo-%nNK(zHZzOveo*7I~yi9I9LTi~|}u{=0Rw!;2w} zb+CQ^mefzC2jE8T4O-;Y&0Ny0^64*LKhimKsh>PyYkz?{C{-9iQ1m4@ON&1lsg)30La3-pgl>EE^&8lm*J=4{L6htj`gD)*?_L# z5b8s_bl+<>?hu=FvJQy0l7%ono#fTvJg7+f?eBjZJ{+rt`Oj!9J~T0`56a6g6vvIQ$m#G+r~*KJBlwZKS{10$ z2S7r6Ypkpj1zSpL_X=Zh2}{jZl%B#|Z(M%aWL~o05m-um|5DS-vFeU94;Thq6KuECd$-=e7CVl%w9Tayd^- zOkIjoq~3lm7jK%$>iiM9>(eR;Mp6DAuq5fR%u@WkuJRiGMHL(mog-+%hv2q4VR-J7Z=3XrCATWj;W{oHXVh zArMc$FIU9Sh*dKyBl?>CA8KiDC)Q5i$8e;lp~90kk@0FNbcfBganx5t?sK5- ziD4d3!r?M4s=|JIkfci$G*R`0c@*m ztTGPayjZ<6^?gWp@Kdr*Bj|0HP-wn*Kd^1X;O=1w@dJ?0>$~WfNhcq`;l;2zL zzG-|s`D@{%%?yQMYaj3U)gYRtu2}0DGy%?k@xCn|fr`;#n7*VZ<2|hMN7IvZ6>8Uv zxTV_6zkxp7rAsZnb@vLVGFx;ar$>$b{YYJ6HH67{-ZHw-|GMd*(!I@H{|$ ztI3tlDuyciezx;e` zG|^E}K4*eXWxhNo>>oqy0|~3@bb5*+Eh+4|h_OXu8i@Ojc!uIg>Z6^bZ`R<)BlllD zH%^fdhG$tbu{FA%?7L8`1$FDaYF8RjcVK2GCZlHs@(god-p4qsQz>q_*(RT9wmx5)3yB)^Z;7hYqHvQQ(gg zuvQLmq=(&XUBv^+P(YcN&r|2Nb!Pt+6#ZOx0BPI!q1r&QJz}#|yjRxaSH{Nu7ki%k zgp>Vd0dSmw!UU@;`7f}?TuEPmMz>goFpgL#lmlzkS= z&|(;$6z)jpO|Abeo9uAa&NpX2QY?2b*VC(7G*buhr^2O#=C&F^#m>4E<G&i3 z)sFENSM$PC2No~S^Vg=K&a-g?Gq_>ssR1nb79KhDvRQop^c-%46l{Nt>B01>EQ>1e zgiSO*+!i&{8J74U7l|?TK$xJ;YcZWazd@h+Y!m^Q{a3MjldAjik9%_|2#kU^@S9zE ziP{>V$T<4h1%(YpLVl$5HR@45IopVhAZ#%ZH1V2W1#u*J=puUPTqMt9{aPfM`GsOQ zjs#U`Q>_@|6b){~e#8Af>^E$wc63?qk2ROn>-~f#rDm)7GmBRrrXK?2ngs^&chk1% zU}_`1QkcA}b~f6>4d|{Q)H;v6f_!_g#9|60(x9_0eE6M-I_xlh5c*2n-6+4VKQ9EW zZx2zsU(Th^RA>bV0souUcCBb}Na8q=as$+WDsb{jp)Mw5lEMi1boX&FtU&?{Ym#G= z0Nh7(OPtPuXZbGcsB6rK=7y&TFkZyOYaIJkzT=eCTRJe%C7ayFKMcK^U_M-O6Fia9 zmn7KVNJ;@Mn|tuK4n9j5m)LKS*ZkC}z8Y(Zp2_zRL)Oo}ex-#%;v}gn6h=#b0FWnw zjFF0P>LNpn?dU#aCq_jXGKFd>&E+6p8_KIfLhs6rd$lb;KHdh?PussbOixc!RdCaR zPzSU6@WEi+0RaRIc%d*$BEMwG(XY_JEHQK9{JujVTUm) zy9(e_@%Uu%bYnVzNa#J$d$h~6v?&0Gtlvnzdv#-w`d+}k$^$en4y z>NcVx<wpPJ=dB8)+Vt zf(@Q0sBe#O72O1HL*Kt?nEsBIA0Qt;0eFa)h@W$(P2jRlEBSx2RoDM5Mzn5~ypFiU+fMnA~&w1d0X}4GNJd(D! zhc;Nd!aiOo-&?13bZ;ab_wIce_ zV2aWFQxtuWa{kr%SFWy98>dcjAY~bXj&b~_y|wP?=O2rzo4j;3iks&ot6y{6%^vo% zOR{u{k1U(qDCHQQQPli0Av9nTBB`wXg~g-6_OYVhyJmE@JLkQ{vQN4OThOpy;-Ep- zL%uzhSc{~vFbesI1l#Mi7E_-OuziJ25g_Rx5N1`QUIJjK9D~63R0WX-+l7ToK((@H zHq2kpzXrK?m579_QJ{ywr{yT-4bxBB|A(yHR$}=bU+DZ`Vl&n8V=-n3Ea)asq~@y) z|3@&$z~Il!kmrppu5|*F`Ps2g{l4lwZ=U&FAnet6=ux{MDYmyKR8m1&50vH%RpV}3=eqMnlCB!bq$(@<8 zJK%rN5#Zap!|U%Q6z%A>0nYid($8O3uVMP0W^?&wXdv4bR5qFpLY^8Zn@3u&mT&5V zA+9acC_U8|Ya--#AF7M97au4F%B`KKnO&Q_uo1Ov_*K~w?r+$0s%GZXHfif08wi9( z{=)fB$rosk@lLWPxBDz=6)V*CIo9DlO=8?8g7(zxRY`j_u#={8(8oI8%ST9j+Z~aU zhI`F*#u5zAo@9HK!`N*)yw_#D7p%F(2%a7QYW$GGGa*no+9Yx^%H!+UH?Y9@aa%Nb zrlEeaq;H0d`uYC|=2^{0rZ+8E=Cxm-%??6$O(>Ee(HU>dtULz??y7oxJWa;s7cFN- z|F`;Q$%>ILbW%F%rn&hnzAvaP$*37EG9ItgV4Rb8JLX6+k1rA&xiO=Y5|r-oMQd_< zWl|=+g$%n!M7Ijn+?aF*N#ndV#-b%I^XKa6C*Gy(b}68z=R&;AxS@d3lguP|o6UW; zcX>AYJ}2sapS#J%!8=Xd!zyS|^u^4sy12F*-J8E$x*~dL50Jk1O!8(Dmu{kNvEzD& zOvx`;0_b6qWHvOes0IcU$i|kV-j#9a*w@{^pbrk~qCrFYL+B2!M3E4{63)w&DA~54 z4o%Qs1d88-8UwK-@WQu#b$IM%G}jNk?E}tas2Cc0DVy#=H%9=!PK4gnvYeb5v;jf~ z#UjUyn$LV1S?%#O8e&4z(j@tS?m;mFZ9cav{vSNY>gZUEq7xGk&mnEoBwjc@keolW z$CZ{F4&g?|(X;l2(D9%LQ{?Y&*ljk*_?ku<+DW428GNqg#C1Ukz#0$86%Q#dCEtba zyesro1YNSzdI6@d5A^xvXkw(fo-S%hOi97|{IW7+uTVq$*@slj=5;$;~1%dq_?_KHwt}Py1k`8IGW1GD99RU~{`* z%A^?bRj6$Q$;tf2zeuLy;HPrm3K<^@!f02Uh!c|b!!G@=%BswzzGr-v3WpEN*;B-k z>3nw5V<8`j^eFnL62c+ZEd2OeqDfZ?Ew3HuIS8nNU=bL2NclQxGdJv`6oN!MsMan( zZR#}-owUjUO>+U31x`LPSfe3R)b3AH&i51c4-u%Pj7CpVr)O*3h8(VPS9t!@;UnDR zfDwQ1p1JmxzcBji*|TwO#o1ih`}ZH8Z+lMvoD~ufU!495HQ7rOPc`%=J)A7|nC_G> z0NM?}BPSDx1DPk2AarjzC+PYBI=0_I|2FbUR2FtaownVcP-SodrcsBXf*X3WEL#Cc z^r7&O**5HO$~ugWL`b!IG+*=_naJ%uP80rHu9cFZgbs3$KNCC``lo|j!{2;Wn|yP> ze#Ig+`0*?GIWUpyHC>b8YVH&_2amgH{{E4;mE8RDqABL5wun};iuOPmxQIzE?0uks zwYne{$$KT=r!j}GwJ}n3=W>M$4T&c)D+xrJA3jX2SHp#_(cF8G9Rd&_#5$<^t&ja& zB)~^L$N)w0R+yUsq;Ox~|HIyU2jtwgfBaXXtV%{DEoDSY6lq+A20}%e+J&?f+KZ%= z77e7*BJH6{_s8${$9;3f=lYEEbDqa>obUJR zh*r%|Lar8zrP(i7lDY4~i|(Z~=f=NOEk*=~liCW;g;*@JlnV)gRP3R`yErcifr$*L zq@B|I%BkC}mH%-0{@k3!HX*wM$sU+mv=92EU+JigV0YbAPj!cu9v5zQTo7Q?rNWC%5Y*RGIWOyH z8^DFcMt$k}Z3re|FEVi;ws_GNZC1mY0Mk3lVUPz5s9B=eMhk?|oM6}rnwXw01?P_M zn{-Q*#egwV72O@U-9-^Hp;+h%ltGNg0bcUr#p$)*sp!MLShP{A%zVgP_A(tkI}f&2 z$!XrPk+x}Y{jH{mat)24!cVst2_FTS!ws7ypzhPnI!_pz(6l4Xx&Tic*dn&5&5YNZ z4x2^6I|*tdtiB@-QnVs?$RK0dcW{`an`H=$F&Wc%-?_{|N2!?cxTd5lrPP@t zj=pw=pne2xURrq}GN3Qdcr3GK&E#`wzYH*a80yY{8Ywc7t&}#i0&#ILv{8fIqF*{l zGEC5BDRPUUB++D<{+c3TY`Z#2;Z9bLr*oMISLQ5wu)7-7f3-uS8eY)p^weEBx&4~D zQI3_!6pHfKp%dH=k8)=MZKuf$ojw;%JvQvzV5b_`e!zTqp2x_WBYo$&n*Oti$uNdt zuyRx5qX6rnH!eIB;VXw%7-_iBJ`LtR7LCu!K|`n5%dG&Hr>GmW1LfPE=o)<46`3Ks zhz7y-{^^`6Gt&{N=A)W@Lqk4J43@0*8q_AmcIgp_YLRL-&N$EdYh+34LOT1{heo~B zfTv4K@BHeEZpmL_=899@9r~`IWG`ph>3bgT0;M4A(jNR2QFuNaZAU;w)p5Y3(mckR z&aO&TMo+f<#&81?D7Kw^JPVSu&6VS-VHprnvdvzd1c@R9CH?a-Bb@^wuR?Rq-s#Vs z@#grivr5z(f^eRJUo3*AZ3BJg85M(WUhP|Q3ZJJ1Nf}o4Trq+exmX;qzZK|o+04iT zr4%S2lH>BQ@=A=l^2~ZD)?YG)C@ngDXGN$>+iu2yvn-4WS9^#LA847>%Z@|TX$HFm zV+hSMnL*S7xExV+$gn&*LJNz~6^h?-^YN39dpHpLMzlWT^`0C`d1Yc#-5wU;R%C++ z(3Y%G3_qcVR1|_AmjN%O2_kon%8VUWw`I3T&G0+`HQsQaSvyp#ku#*s%&NO%RzMWq zo220fiA(qRhRnni=L{p1pcV?Mxeq0U30ufLltTNhX)N@fNTy9>Ov0U8EK3CyG|&mD zDt9K<2p+kVqW|Q~Qy$C(tr)NVzTRV;rPV-hD)b!30&DH%nx)4V^n^Qk;wM7SC0+Xh zrRPJ8ipU`X+P?956(AxqA~~i}SmtmHp*(uh3Y$B&3X7a&&|d3R+regG zol2^iG}pc*NCsZf2kk5Q6;is;wOxgaO2KBo9pB?BIuKMqGuf(}vq04@ERn z*v*{S4&6sE#E(vicTCn&f85e(K$C#$bW$ryF6KO5iRX8I1@xHtzS=d$+#ZC-Y(Qq& zKmzGJG6T(cQC<)4M>Qd^?PTsG2c=-5Ck2dyd9TGH@ieniQ1uHof4K*xnn$Dz=IZ`x zNN)$#h340~1BHhTn!`*VT#cnCcu%~25JIX^_#Dk&s=D|gB;>P~WB-WI3&PUg;&?#b zZ2&9ys=%w*4sLq$ESWKc%zi$|pa+VP(mZ{Mi?C_B#f}5Sg`%QUVET<=92gp^6H?f9 z?e~NxdoUM#EeX4|OuPgm9$Bub@V6hVNK*nv?(zN2xjM9~62eKD>(dt44*cYG_ zDUstyV2e2y6*fW_19& zIQhOGr6@lm?$iM<0a;f9?UpQHRIlO!){O@DS~mr?wIOD~r&t&j!CCr)yo2ykN5^~+dlj&th@(5>MGiEXzOEtYmdK(uy-Yq!Z(~*Pdcn{5cTlZh> zCO*vFa_d?)=~(auViE3N3q-Jkjr>q?BY_7-{F?$B&NB!qpr8Z6C>{)Ps0bRo9w|Y_ zfVT-doM2ykzB_JukLX>iEvx|zFbHC;VF<7LjeY~?>46>dSKg52|3Hr1F0z-3e}a$Ph>ghXJZ_?oL>M2qc3PBFAa_u7&# zpvOOIY?N~j{Lq*f1IZ++IWf^I>^fAXIp1zFOd`<-3d}X!RSH@xKe}hCh!JMaApA{l zu$e&}97W{PM}JU%90q$6xP{?CEJd_pqkW?iY$nKTO=>kI>t$vf1u%@19#V{Mturju z2cZ=JMe?GEgn-RpROe!_ z>?oJi_W%%c54X1ztV1<|? zWY;nLCp#1ld=VUB8@q{_%S_8@af73;G018ppmSMQfTE2O_z8rDdTtGYP!whg0x@`t zAw?7-1i|9my+OF7bCk?Ddxw_y_hA?=KKEC&PMF&5$F47*$Rcrp?W|-F9tj9V1H0?y zR^YvGEdV7GmU^QHGXJh*)_&&Avc^kn9ttMO(EBEvRhywy7Dz{{wq|V^=;~u)h*Zi< zt$1<0x&-}1o;ZFCDuR{`s$`AF@PpWE4e?&QJn<~5(mS*z-zJ>Yg}Yn}&MyPnHj(~~ z@j_e_^?TeSkk6Lmm5G}ykYjTq_DAF+7SjC0??_w}`9stYYZ!Pnt{A{6x0~EUqRXa$ zl>pB~0d^j!zS=!!b+ZOT)%T3YYv>Nnim1wcLz-+!q8CvbA~1B**}sWPL2lcvqnqLf zTU>Cssw=R0^aS3TY~dK|)c{P%&-ubAghY`@hK-~KLPA0Z)w6L>s1qrz^asNO+A=%{ zCBf}SO~TMt;u!48$zIQ^3c;9`iT!u6(~tub{t8(I;aTkEe!uq%ry}AP4tk>6H_Jrr zfT^@Hvs9Q_2xW_Xa~!|eCsm5DDr|DEGjV&FF@rc1Jhed;e$b$q=O~;(4}yRo(8mcH9Q2BiF5lBX3 zu(wzOW&Mv6m6br7Miim#yw|)DbmXAf*Go^17zGLfei+!?B2Rerd;upO`_w^VcQh8; z$wf~gC#O=v)0GW*X5JcRMqK|bz;^>U?Ju8S&F|Nj>P!Hj2i&FJc*|kV`-N6pf=R3s zpQ2q%564Pj)K380|3zCCjR{)HOS7;E8U+x`rb=5m3@P6Q(yZ4gU`|9;h0%f?k^-u- zrN0V*aMGnrG^^9nLG$>5giL9+2wdrbeu!DN z2F8ezDo_hMVau>{H`+2J-w<>Zp`8*z>kgOxU}8dlIUo?Br<|Q(Q|!=1Mq1|a|6Z_k z6lqmOcNsTb5Y>kMV+XBgt}CXQ#-zh`#zh8N=HE$Y?I8pI-99B^S8>1+zLWW*zery? z3Q>D@RJ9{%#U?7$p#zm>Pc2Ua3!>c%_%Dklg9(KQTk1VC3$JLRX8w8vNC7$Nkr-(G z5tiq{rJAk)QS6TTsu7GrAM9D2M*q0KGUHd^`Thj$n1t!)`oyP{utzragQ|9Sed`b)?%FMif?A|i$jw3BXI3K%ITd80nK+ZY+c{@!6!INMcb zB_#Ga!_itBngU22V)UI`0?zRx*tRHvUB%ZFYtG)iA;N~poTH0j1<1EJIT?8=vQhUF z4s`93wU`)lw{1GMHG+Vhg%@}$Gt{&p_mv36 zl4VH7yCl*>+S0`60N%M$vswXi5)`Fx%Z1HBiH5V^5%>iII=n5TLiMd*Y;K06+e(Wg zyW+HWI+U(ny5aZN1tq;3L_4Cd)Y>)i(G#{H#{?7HhXl~|=Q?w<^dN43Xo0&I@`0l5 zI@q?mAy1ql8-UVEtKzl!uuD~E&(WPLM}~VBsk(M-;dNA_D=lk5&Ozmkg2kJ3n1K7p zw8OID9GU_0t{6YzU<=fCKB%BFRpw&t?X=hWaMj?tpucZY&i-YSviR0UBlRzffp0^p>FWf+uff5_`7 zp@I=F1Z35~Aj{pGht~x`;4DB!_vx#yd@{Sx?EWazek+es{g}X;S7&vv6ObWa5SvA2 zVqMT}alBmm$N)e~cq?s$haKjjeFJ5(jgbXZxuNPBAk6e6-)?VZ!#i^zhM-#Ka<3;> zM7uw0$W-P%ke!|qYzGZp>onkRMaNnzle{U|GSns(ZaY?zJms(k5x=kxYUhBOXMn7~ z?d$Xr!;h>;4MWKyeo{?sY46f?6B@Oo_f7664#gi3jM-Q&*!&h`!X<@w-*H0BBy^afEbJhy{;fdztw}%vg>5;yA=jJON7xZ>s&GHgR zQzBHAqX1nj3FzW5MeC$@G~F2V*B02t9E=H2J&vsWt)Vj2bje28($jNWqmYwsT6^g-%L}o5cGecCAAgi4J0#w*H6l?E+O$CQZRn<%GQT+Tp%Roa zZ~9N1ILC1BGNs_2?P6DvF`u%`l#%zjgLPgEQTCB?M<(06eJ=6Po@B@0lMA=S@e+$y zb~dA<%Gzg!`Ov1_gFgDbYSs(BXM)tk$(Sf$4LI&*`7!MT<;S#fncVfT1kebY$DaWO zDyT;Q#cMDVUx7uRAyt42Ta?~PGORf`zA5@NVm7JWsf0Myz zEx%d!i~I$CWYAl^lD>>3Sldun0b4*E0(o|RgOo=ed?;iALyodLe}63bq$7^4leFT;$HKai~3O#drgoZ6VFfo?JnD#7~0l0v8leCc$f2q&NfrlSD$tdM+ zK@B6+W&jw9iLO>ULx}kNMlp@UX=^RceYGbd*7g{+Dlwz3@LMxLCAK)(>Nu_gpZLn5 zB2egdxNBsn$(upUCJ@-G+$9MNA9tR8J!2+s9} zqNoc4J)tYeXa#w*Yw6XG+HH14x!3Xc|US_0~2rDiP|peC6H~sKY=&bdVu;xG}DTCe(M*pMtU76 zd;ueZ61B^(yr}djQo>4lf0!Yk^1PaIeYu)+R zF34WpmwMX0>_Z>Y;CbqI=cVvmnqJa&`E>~1mmu63@Y}L_8@dFCWe*qjV?zU|V+j&rEHx}MRe{3Zc3pzTm%jt6P5>By!w_Tm6s&eOrR6RZT$^3W)&-OL)^`kLUg{xK&yS5*tYZ@ zhbC1rcmqh%vCP*})$+)zdstn@>haJ!UiBS-haqAThWh$%fS7AG(ZiwjvJa01D-+LJ z45@(6U4}mn&ezWawg+xEVB0mN7l!K_u#F;p01~X!k8xw8i6fq}xdWXiB8Zd)xmMqg z|8UO)Lt@fr=!KHtcD%9G2_@10gM&$M8OFQ7eQ?JRi8A*h$NykcZOzqBy-1vbu$8#p zNXu6_eq=JM!i~qD!Ev)0>C7E67L8U_uqonaYM(nP?a~Y zijz<{z7JdyBB6=c7&zSZ#gY}Or9FdX0~|S9KUFc87H+QawN!KmC_$gxtFjdcVJgJ$ zkdO_xP^*Ob@se8zM0O@9PK$T|p;I|o(;uX5#6=DR^55v|mdFk0!i%{95&XR&{;s2x zNV6Hn>G~^j64%ZH+)`^1%At2)M{ ze%0S_FmN+E8t4`vTk`jkG1O7Ti}n-%7i!`y7xy2Ih5lw3Zygq;D=T;m2?p3t|1T7& zJ7vBVsbdr}Y%h|)cWodmMZm@%H51)3T zf-|zOVWhtm``W6AyFFL~B4m0?`cUL*{mt|y3mv*&;RKcfBfB zyy~R0Q2DlJi1q6Eq4SdUk8#+(90!s~GiF5C-^iom8s)Lr)*EFdl-&{0pD zAx_QMMn34j8XO7M{Mh;EpWx`<*k1o9I70rNe}bcbf}?+eBNWyBM?3ic1V_l*|4(rA zPjK|DW#m7>(f@VjD53xmg8S5##{6_Z?p;$`%K~N^PcRa^UP%9?*}2jJ8WQz{Jv#Rg zd7x2y1>_$_tWLK{`P&C2zpoQrbsT_xTm<44QZs_67<{za>X^`J1qbNq3&0pmis@Q^ z(!m;cHSE!ETg5~a{FPYDRp{!BwXgv95;ChJ;H5kGvfqO3RC}zu7?cAa^D5siPPO#R zbb0t8Bk_AuM)qOLhO}<^tT~@&x9Sy9vs{UV9mgD!2hy0WF9?re-Mdi=`OT=F&7}vG zkpU1>oh<$hor~zX>RwLNN(Z_eugJroxdR8%SAvunNx?*T-SKl*-vX7d8yOf;%O+F6 zRlTsY1z)TQ?C_U>QK1`HoR~!>-D12O&ej`k@FI0a>$_Xv@dLV#HA19vD5n&G+hGst z9oJ%*b`~{NYBI`N>;`Xx*P0Df-qU`fuij*qPDYt(V%fYaf=dH80H%LkQ7hH{bvm~Y=svLqcw5+% zIkNB!AvQ_KXm=r(IT*0=@!Huqx4F15LjJ1Sw^QD)q-lUk2G)`nSg= z*Pat7K1NEv3!2$HM6dU5I^L4(Nwxsmk=uWEdz4vYgi4cIE4DGFreA@fKG@YaS0UK& zhq$z#HJ2?kxe|jlmz|74jXk07Xdl;1`L#1Ng69y{mH>`1L|z!Ii^E=B9*jByLI%aQ zFT1KMBz&Pa>+5k~xo`xYFdvICvXMmOGD^xPe8wS|=Nj7aAXIoakgUCk$@xSW%s&zJ z_4n_WEo{aJ&0JsUp#v#3s&;}A^~MC__IH*&CP$ac`Vvi=HutsZrC$PMb9kM?Xii*c zCOVbHS6j@^F1+z0bSe3&b#V&HV2`Yv#D!i&T6|p;$ABph!Gw-j94FSHT@v#JlREDw|)~5{>VOQ$CBJKYJg2PqDJ^e7?0ev^AEB~#H z=cA1>oMRfHB#EJB;O!^t*vknvudt4w?fesNVy$30(($jSP<7>ba0@AhoqbOtaKA?s!lt6Eqf0f^xj3mj@kpR?a58+pk>Ir5zjBLcNnrWu_R!TS>H1|7khGi;L@db4tW9a^UICF$|tmhM4Y zB7wFJ2_rmwd7{uvicog%r3G#Z@Z`aLOtxJ1H$QYXL#$d;G|`SfnoOaV^@o4o_cWm6 zd(y)zYaZvbzt@kuhfFo=aZyq#W(v->-$Y%{LDaXMTQje|ryM{Q7XcQM`XY!Lp9%eY z3-Q{bFr~IYkB~o9==o2P3f-LswO&=o@wj>90ps!}wNMEbE1IWbj1F;&kG0u6#418n zKo?YQjpR@;sleNZ01&Xx^pJhL4bY)5Eq>0(x6YgK5>AdRD|Fetyk+v1YrHvmH7A-D zXX{flcOSZ3<|#0oeyHxtT{{m;)H3r@qX4245NVO3wUYNj7rPe%p$ zv}e9PZMs89uFb02C$Y4s+RHuG!Y#wJP(wX!ls87@EktzXjxjUkBCzS&5>OZXI$ho%%o~>zP=c;t?a~*_KXC$ zezrSbrQQ@vs~WVyhMRg9fkS{BnK>zuOu9T9w7d`XYw7Y3Rs~FP{A3qmy(6usNhwEa z5h;G;wgLVNbcYWZg!LJEphG`;aB)k2)k?+OaDHDPZtIJ?VQg~*KMa$1zV&q*d5ds|Xe2uY;QFSc5W^hQ3t zKX|KO79u3zcc-=vx5LO{Wi8Ji;)R89RO-t zcWBBmhjprioLr{BE{cDzxmQOk$VAu*>25qg0+CGUQ&|dTZM7NRm_>oJS^3i**Mz{U z{T5%D<1r_sJ`rTNKki3y;R^0;T2v^o5#x!N->ieyOsMa{0OJLf7N9tmtXQ>mLD%@B zReKt&+HTe#m#T8^+11y;XnKWYY~HV$w&2A?|8HE`Tlq~1ZN)7zGua+40h4B>f_+hjahHp*A#x=sHEg+VD1iOZ=^#y_$ZM}u~N3VrOHOR-h zRfaA7(Cy>|Sruz(VEp^efc5*HPuNsF0 z{jCr6YH?Xh@0^PcGo}H^DRpTU=sLy7Q_q#PkVt3Q8$1>`oi1^Xzi$_GCrU)k7X3hc zWZ|zPMd^jIxU)xEcNtmM`lBG&4>8oYUwO83emE;jaCu?eq*{!6o9dLPwOpK<4_Nz<^e;lgr=o8(>%J@oc^)e?oefz4+`1Ryr%Zu!8}$~rbfcVCXPMW_0p2KG zy>3|Pcyh6=LJ&(O(f^M;DUTC1a!C-XdI1<)7@ZqBXr4vdl_`y#tA3}CvbrMhZwRrT z*7Jkaya(Dr`XG7;EQ?r&UFdTW6_&N#B@wo44cyW}@UiPsyA5%M>n_0B>q~5=0PO;U zoF5~Bdj6t;Yk>fl2N4mHRJ{Ba^od=jUpPG-~bo>8kkMUD21{z292WDny zIpynNGpJ<(+` ze}%d4?Vwd$0;utDa}<$4ky8e35r(kATsGMLCo^n-3Jy&mPph+-03)&$+cN-fkr;NY zRfh;}Ck6IyXv_*tn?WR`rIo@0KEhNI`+mhY;-5$7wl$bdLVM&!7_1ojzNiBAa3xri zi(dW28r(t{G%HX?5xF7M|7ylQB)S_i4%1uXc`Ro^@3gVZbVaX)D?+>elU0 z>PAqi$v`W`?O-*oDRNqyp+~eKXw8`?2gOJ>(o~H!Kw;%I1?BE)TC5R$@;H)^F>$e> znho#^eeXAsGsTz^9*8RDxA!IsB^2XG2KSYbsNA9`w!{V6p>mdJ*2pt21Y`bHxNVXR6IKhExt;X+o1K1bJ=eo zjkv0ldr@C)lARo`e-^CBE(_Eo^0k!8-NNH$G={$aQeF7cZ+)8)t?}^?KNu{}Ct`$i zq(lJUA~cKtlY#GQjf-fwkFoLp^7)^&DBKJP?_$-Gw_rhyQulYVFyt(5QWXM0&emuQ zV1Ai>8u@&nKT2)d)iaoB7?%t_kl&E%VX$NqBX!w&H)9(=Yjd3yk1^KkB2q&L)MeKa zMz+6qJJQC=INw;t ziCb)}fg$Ey%9(ZsNE?Z=afiP5ob&`cG3>wjjN8U!lnyYv+GB>Km*>Xg(qEr5^tW+X zm^xyU*z%R7XQGMGOGTW*!}5%--k=a`E0apv3#r(wiT#NGwVl`W!L#XbNo{E0B;#>Z z%I$=cn}_Abix82y3g7U%HcaJ}0(cUB#_4uDyAKVqfcG(iKp7&PQy64Uqivvg4!`&Q zW&Bu$HRv}#Os_!Cq^UTR!T=l1{ES#>7Fxj)NWk}{q;4gQcy`-YJA`Dm{e2m z0a>M=H=!hBFk{6)b*JAds~=&xFz?Al@Se0sgNvX{qV0*%P6WlRnr47(iR;;v+)6Xh z6=^?#r)MGn=@gk=Wo}Mx-cBpS@%AXcELQ4@$Gh45IQ_o1G0Zlr9FeelrxXn|y}j+J zQz3+O?8CH;5`_Ql1M6&t!JF`Qtnmh_^}yFfkGCn|YXEMPltD=_MC`SuIsoeOw*a}< z2JGBee^guvVpFOubW-a8pwbf&mryD98YA=i_{>coihf~OalMhftzJa%xAHywP$aYq z7=5=X59%UozSHUviC!`2*vSE@H>mtuX^~>ZWo?FQjYTBFQi_U6e&klsJ<`KLULM`PO~*j14Z(*ofXow*0FuGyknmqipMI_@qn31fsBxMK zJS&&S+nV*Ia?%?vU^bw(VH^d|O2tmyYNgPzs{$1|-pVG_W)q=OhjQ11)|AKG(~H*% zGpAo~`p|pIi7HUL(ItFuW>3f4X;*}cVx5z{QjSV&?f`X%Y}gnWgQzL1q>a(g0#Pyh zpVf&jtU95V2Gt3wZ`Fyr<@1nkQf_e??9;^TBv^R@#_MO4*xi+KXoD(ANzh_bP%*-2 zLUZd{Je{`ot*e*w3j|k;?}JD7v)%&*A&LG@@B`thE>0fz{PI#IU-g3sHf=GR{}5#h zqN4Z6lgG7$t1kVg-3UY&;2Ty{j8$E4&48QN^f~Lv5c2A$-P_G>?W>Lq3qg>MGc%jO zJ`WosStuZ*BO6+idQWXsAT^cDY6tDquc#INM(MK1@{Ia|eIM>qB zvKwe?zL*E_qj6y|N-lr{>I=-YVieO4<%(_yz&wvXrTh~d2lo96m@@P)kbgi~R6SP# zD>x;x?1F@FKQWxxng+ACnXUx{ww)pvz)X@sHAIpS(q9r+w z8+xFbgO6V9TXBTsf%p1t`WvjD1`|My84&L^Ir>O&S4Us?;VZx$8D^%StfL3~mZ6Fgc#@#NH- z%y|+^>Qm5lpvnEeg$b}X$5f%AYh%5FsXziAM})O=-sJSM?JO9KTShf*tu(O;%o;Jn zcCVZ(yZqNfHnu;IlZ517Lj&R^K%OXbiN#@UgV{HgCDMXhs#>MCGRUFi+Xea>Q`>sy zBP+y_0#^S**W0%NyiA;=l0||O5JX#&_~-gcv(CrZ7~^j@#jc1aaf4Ow#HnC73c$Mp z#3#5F7I0bWWE^p!wO_@DCP#c?Gl^rWk1~GUA%$xg=i(-?jDIUo!iG+aN2JNow)0+__ zXOt)bvJbTT`5dD5f&zUB$XN`@aCJ%7-O9Qq*$DFlsS z=FF^Dj7WOD9kQVn3c=QJFA4xFuYx$2?ls0gQ5{;q5JKPgRVF?e6@cAvPd}{ZRW3?B zn%vW&7L^FtE_-3Q*kPBMr5CoyV}oAqZ|TYmuz>BGV!`3S#4?2KQC{4O5gif<{CceM zF<|3~Ue~O{LiM5&r9VP=XsYG@%~E2|h&;{t)S+!kFcRFa*d8H zMcMyk9RE(nvF^3H4U&xUXl9s2p?2!ve@h{;hW6M%D-!txhL$r*&`h|r6gB;=dhX*9 zT}0Y%@nk5uR|N-8*AT2JnCaxUIe~114E$h}`OjhHIGs;(KIh10MDRDSJ?AsZ*in@h zaVHh+ZiB<1${sUB$W%%W@2MLB-UwLEl2lSj^jUxmj_dw{p|=Zv)utF1(lzH}NY*Hh zzp2M09GAfceO^v~LB^gjF!t;k!TKLvv*>S~qs6KV9A&0ER<`c|V@AdB@S3@b^;%wa z*DsDtl)lZl5iQ>P*{M!H$>|TgW=>nmN+x>fb%M3?d3EN+A8EM!F}@ojJ2-{QSobno`}pS zuB$@EJ~vA^tRftCg_9(LO|<>oEd+KF8{+loO=HuDlOvMvz&r$v%b5t?kzf*B*1y0G zi_k(fIUi&l1VEDCK-%0lCrVLnzrKf?bsjQPD80lQc(G%g6*)PKex|7Klt9Zsy$(sA z^ZQaE$IM@U2q=wf)U>l#?>3P`l>rELy(0=XrD`J7} zkA?i;Q*4aoFkSK>H-#ch<2-LNCE?gH);ziT614SKtp4|)qG-i1 zZ=PfkRuDePsAtr2BdIQNK+TvUae&sPw+`mp)cj! zwoUmJqkfAkxq$WN?doi9FYdbS?bF<#lgg?PV(WQJv&HZUPtoCcTZKJj%=>AS`$nW2 z*{ivoqaVfGTC4f;@cz@$utVdpzM{S)_AdGs;y5+MJ!FYwKYZ+RC#H@fWk}3{9)Eu# zK3V)iPQfFSB%j16HZo=Qw0>RBNsSi6CZ3{q3APG{$(RoY#N0Hm^0~)hYAr405bY6T zihb33IObW0GgqQ?sBxv7luzj4s}7ST8vgL{Hh$$8xSkD+a6NtMQP}moN(xW(7a&vS zNIOrft=2Qf?hs(km%b+OY8|b21g7y0ZMgwOsCvZa6e7TP`wX{B6U5*z-b3( z_NWMfi;X6Qq{}9B)oo83EuEOw36RKWs4vFH^03+|n8xfSV@_mO%}R`gI{|lGp4MBK zJ+1x`9048Z9xn_AX5RepYVY{bT{jX%7uB0~-%xKc4AAwwrM`GTidIxsP$A^RCIP(X z(XD>IWYH;+FOQu1B=`Tm0 z8VFE6xzi9AEkKsYUQaO|e#}>^RA;(4L&2>UL=Yn15OsTOw28pi^-1-&WNm$xg?NuNDsi!#Tm|RZ*!|psh*tFWrA;?HyvM2Ea=(bC-g2?zqq&XW8I739 zT&hyexM8J0UhmdBVZj?(OyPhjL!S%gH1LBXr*18J9vGS9>>-UC9-HIryK;Xnn4-nBghJ1-WdkwXviRJ+J_@^Is=XB=M|ZVo zx=lI=FTnT7n!2tUAl;rz+hTf)Jxx^BJ30Y-e6|V~uv60BsHfjjmoWHnw_>C%O-@RH z^Tvew0eMHq2D4(C^vu#k$+?vuJkS_2u+q?2WaTrR3ETWC?C{ggl01Rh0?}`6N1TEl z=or9s5pV0cv;Jd>q=M=FhzNxnPE4lOOj?S{P31fz2%(f zoWj5)n^x_CHF3iacdmRH_LF-GH{k5(qGQ$3Go({C;4v6=QWJtHB}H1;T=Pspd_kg~ zKJA+KbI0B_9&aJHhgrwvF0YxnXM7hiTx_YKZOwsg1EesC83^EfRT9@KN5F(||0K0u zhOLf1?TgMyl^VFj8*nn5aLC4#NAN$mRisge=mo6L-`6a4dl9oVA_=onQuy5aEm|e6 z$-XLDHLR@l6z2<5GJ0Wz4(M#4XV?Wp)FPq&+u8KPq-;a0EubLmf`^HPk??hE-s@?H zxi^@(;>i-}qZ!45NS-)x_^7DpCk#>*zVAoZxK-JZ$6(f}oiH%UDygLQ!*z<~`*M<; zRuEnbZ+J1hd~=QGok-Wg+b5GCx`@tY6}(Lc$r5d0`jd2Uj`r9c>$PoboEK&GK$`Cv zhiJwDr=FVUcaoA5{rzKnTRRUce7m|mb~CTy8#~t}+}&Fs+o(UP`8*Ha_Gsc3)6$p6 zMC)^@1O!huj^1AHAKveDR3Q>p0XT(Oo+3+_o#Dz78GPS~UnWn_R8Ab7i|wgp8i46x zmM20TE-5mpCsFwsNtBk8)P5QLx7FPg!rMrc@jHD-@*b~u7Ts)`Ysv{4Bg&k&0KbY{&8TXDbX%vzkJW@|l zTgz0c*_50KR{{r9gV+CPy7*;FT5Ux*o*#)K1=Ek?QyH(f$_=fSbDwkOG%{eMsXvo9QOM;zsGM$_ z56|xknN9jU1+-mTjc3ho_L8; zKRSO{phk-xqUn+(Wk^=Kz9xm51m;cZ0*l>FyqDV7=+6h+CerD#ra9hjFnjepI^2M3 zV>F|zv~L6>t~8bxo#W#}SIFJz1@1xf-wqJlHpXiZyE6*Ab7jt~N=Qh6Himb@lTJG3 zoyXSaCy{(XnxA*tWaCA|-~8@M=FPdD>4}%K0RRw!~0F zqvhO-#1|%Xtg|}0pHA|+wEu|(Bqe! zJmd&*pMwhp+%*SYmG(6=(!x&);N*|h6hSBk3xjft>SJ~p;i42pNthB1j}DILhGZ?x zZKwOJ$I}iG3C4SDo|g1 zo>RbAY3dV3PJtGGEk!L9BV158(Zq3*wg9Hxg2V#2Y1?isKJRoeRvo=!&|pS{{~+}u z;a;M$C(ncT18LgquqbR$mT90!RH>JP*-@E9!1`$~G|VKz@#r^7f3eh^g~ov{>{7CC zqG8F29}P=a&?78y)Os}lHTVPvd1m8$?l;xWyarWX$zJC5bJAM^g(kKq%8mRkw= ziEQD=@=3eWJKY-0EEXgAR?Y^ZSD0Q?RH(7VTH8N5**4yQ%M#tD$B$`nBC})ftTH7_ zjoLF+v@Jq%v|_Nqv|Y2Duo}VQsc3piSn`9%0=h9de3gfrPCGf}z^R0JB%7_(Y}zde zK~xdE_BDmgE#ED@>)=#e(W$shbWN&2u%H9WpJ9rU>0+rL@4#rpRRt?}V5+aT9$%}` zccw7}@U~mP`Ir6S)dlV=6?8F^4RLA`b&(fklCNF7lxAA~DdAdOWcZC)A2#_H`x_|y-A-#S(_~1N~HyJ zVASYn_8LT~?cs$uJ$M~}8V zeE5((V4J^o1s_FxfWokhiEnt4G}9|%GmR_5I4MTE#wTwx&g)$1Qe4oANK9J4n@qwA zWC=b2K5l!BJv;+^)>x5}dQXwDK07SK;l?C%q}(n^FPutE@y_EmyY z$bC&zrZK<|NOW8`F}dUK?;oCY%pyKTA@14BTiv4`T5wTpiC2^3ZC@!<8+a$ zy;au1e64O^%pko0UC-tWDPL-^xLCbCLIug4F)fwcvyu$OlDg$u`LE?q?#Z9r=^L{d z!q^Z#A1kfx7fd%awFBquSeH>nH~Sa-|#Xl44Sa|?DzSSz^R&kHS>X& z*h#6IV`*Uz@Ad?q-tTL(|GvW4S78UPJR^;-*h;Fu9I;FD?aPPG@Iz)_tNr_EzQ`E< z_~HD+T+7d4@!Z??%i?e!Gpyuc-mtiSz1Td*8#FcEajiOJji8W_aaRG^Ga)NEy+v>$;czNx zrW~Emume+i!3h$6w~gz;IQIP!Z#0~wwj4crv?F7>jTn~>rmd5wPI)zOTza$VOGUs5 z@aoH2bN>PxAzct31Sx$(&@NzxNUpa@4Op-bC15_wQKwFixU^1p$0k7 zGLH4tgjXkMNt6W&?dU1>OT9ToI1kpKpInMMrpXtbMx%hR(hCK zbDb$!t`E0mFc%&5vdAka(Co~+AEBDTf9cXC>327EVUgOzXgi}3IG+j4KuiLTLNg1> zZr`H#FO@-BEhz@zB0VCVVR?&BKp?lgoNThcE*GdCp3j~|Crg=|zq#aoYtd$+T7snZ zV~W8Je$&CV!otGvcUf3j*Mh~5e_%Cb)2;#w*=GmpLmxgcX4rP*!KAbEnL<7fmo4z| zL+*;u&U?lTJx&hcheTvAoq2qK$k^D}w}M3Gb~!xDmQ56t|lR&5;whH)(e+_n1X`-ZLWc5-u5O1RU#f3W>f zJYXE?1kE?W-B+4Vxmtbvi`cNZ8*Sei$?)?U&{-E9=;OzaRf#&1<`xz^_v|6bTAbQJ z%X{rE^tF9DO}3eG+re+w?gG~?@x!&BN7wEQ*G>wxmnX-xwRg$Isk))X5wIcw@SJ6t zK-+u*_$k$4r?&9u)~p7ryr`w+yS5?6O}(vYN6rQCJ3_aUPr&@R?T~9H%yq=5?XHuP zgp77Tgovh=7J4DVARwQg8tSc&@h_Nzmeb0^nYy~V=FrYr8t#y}rDbQ47mH&6G^XI= z=kJ96uzAonMkeX-PO zw~2?Di@Ac{Yc?IaXV1(H#3fkInxNm$Hk-i!m;p!VC^RhZguC({#i+eiJMpukazoID z_i9`EQSZ~cH*eX}(cQh8>B^iFxGOc`9T6vWg1}J>qR0~OeGY8$vDx|g;hS!p6P2BT z6F-%$;Bk5tocLz$dKTsyH)95$)epuacBwy;*iG3bSmMTFy?I+z*^@)Y@#608 z?pr}F zK2B8#Ct_*IkA`oGzTzW~WXs8G+S=Oc-9=s-H{Op>fG^Bb@X%-M65RiaW`L5iuK?y& zV}>R?oEsWVA7MUZi+^dL*t23<3X1g7Vo(2e7Y(91B)YYcM-7rJMTTL4dwxm+Pm_a$++|-b( zNl%IH(983$Mn3x+<&>5}x6GxtYfR2*zXN0Ds-h>%$891>ikBAWji&|`!@|RZt!Kx2 zKfKxkZ$;R+!u<3wDUQdejgEprFdGsbu{fjhdmgCEY;z)pdAoin$cPq$)%tH2W-I@EFE1i z{uMGTlPezSG3%vh7jMRi6uQ#~%kEFEk5fz%@0x^`=2cPhTrge31goMIAnU}b^6su` zhGhpNm}BoMg_r4qgM%C2-C|2mPcK{GgjN@>#tbs#@IKHTs0e+Cy$^c|EJTl@cOo0! ziTwo@-u?kF|7k}-yJVLwd(Wu@SZQQ*G~uSHGka%W?Ms~f%#1lW0oQ=@6GE;do2aRw zxzP*}CnqPG@BvC-*zs)h_B}p5^a7@zzzJ0dmohptswFBRp#}5($+Ktk0ep2G9*IRz zn%S@>yeMnQ$nvX#PjGnOV6I&Nr!kl(7(ey)I>S}yMMs?vUk_9RdjK^sL(U85^4zIZ_^7{XX0%Ht)llUb-i8+DS+uH4zdFBLR7?y_j$&hN&d%%ae1GZA zXev1T{iVZn5cY^E#5&DpJ_WK%YJ=w2)e5!)^&CVZckkX+*rO%tDDp~ny^S!n-QuJl zjtS=slb&vjTBh||91{%N4Fdk6YsR1rk`!lYkj>PS`Z>JK+*+I_*n!7@h4F6P-o1N` zXUBBh3|e06vf!LgIFWC&%)EaFebzBF#DR0pcctEu(Y`o8?daj*fn#!K5QY>)LFlRp1T&1m^XGn{v+Vr)_42q3OC_48?{G#JG=XW)!fDaYgD=_9(0B zC@Ptbd^(29hLS{rRw4PBKtX6F_JQ^3naxk$XIM>F!E`QYGq)95m=^*~=|Nj6tc4~j zGcdpK)}Oi+Ar*wbe&a^1f_$2(0a`6^BQOn8?V1-Ab%e#I+76vB8g84>`M!hbaE<~l zgB+bW#Aq9h<8P8U>sQ0<(b3c6SRE>%$1Wu)N$goWa_`=~RXE$F*(=`AlsqEor0lLc z4h}ub^3e)1dKpnsYzvEvXsij^F3?(;Fsd$jXyczAg6K-hDy2)?n23DNM=KG$Ke!CjTr%Qzi={P^)zxbnTR&e3NbNTA2k zt5>g1N=Z4LfAv9Oxx07^uDNFhW(P};eQR85)|_CLJbmt*_{(zv*ZXRAPYgDpoYww$ zaM84JSPFi6iyd_9%7&Ne))EzNZeSCoS2CQGw?hKMHd1Wyh?WrbKJxv$04t34#&dn= zv{&uCTal%lYDk2G6YAEPv&`SJjT*uLVjR3$x-kN#gER*Z9{kkVd8fZFirTos`GI`! z@iiZ!c$yosu$mI`ugaz;O03=8vttJu))FY{e-0F6`yYP^;;$GXP7d z4n9E|t(L_wH8nMhci*%MzH$v#w9pEeP<|e@v}^C)F32o!OHn68MeAXq@!C^hsXc(E ztwOldtVWO=@W2C5M$sIMdu?{pnO)2wtF{Z6vOjzFEMC9qpxevP&d$yQrh^K&?2(S# z*{>cl}hNKmx;s0ys~qSR)NGG%+B`kI)+}1l50%V*#HNc zFl5}Z!xiRcmVNt9X<3+<9Bi|lH=q-?rGuW&$DsVfTW>bhybW3%uzh{WVkB<5VS6`lkitg89jU>T=y+;m)le~W0o z*v*?f`!7b`_4M3yDa*FCrfKKF;#ifm16EV|vy<(1%VaxOt6KK$wrE020Ypf%lO;ZE z+$P;8`S|#r11Vx;VgjAWCPqdN@X2?A<%n*~U;ns*0u2OsWL#WaXaR(=#b?sJ78r)y z@Qzypz0HDwg@m%Z-utn95tFWMToK={PJ%9)pHk?q5 z`(1`drf7oX^_ErlYIN0s1(i z)%cC+5Uo4i(N$JfRohbpXpR`t!Q_97A=Gwpj?Zd}JtsFe!E9Ld5-jgTL_{s+r7mzm{KXIbmv@ANBT{cdY9sc3G^Nrm8|8}-RBzmIc%$&*#1@FzwEG6JY~RaA z{n?eYl{$eJ{=uX^HJnvQ&U=)U%Q8bqvd)81xB$gUzNGT<@(|k)>y-K?D=_HS!MnZl zZZ_m>T7A)f9<1ft!8qYyJJ2PLW0`-V1>7c>9YkN950AH7whf({U78;bhSwp>&MxK_ zJSub#DJUe)oY_c1PKQ$IH*elNhlFQlvO#Tf4E1EC55aMHh$S2YgI*aD&qOIh<6*#DR9n){m zq=)sv3FbiC`Jp2a>Flbsfs_sfTySq3nGtv-_yw+0|5{uN0C~UFl z-FzT!rV_OQH7vNLWyiz{zl1VF)d+o|K&dE0g$aTNec@NnzQ*fKW=imrc3(lYT*gQi zJi)nv>-43E4x)@$z`O&UVuL9dXZ#79Q#*NQ=UKb&sJHNQ5BIN*A4y3``N!7Q_L+ID zLVVw1e*SHH_ud4k3Jet7WTC`x;WEauFV5JalU%7P%_)n~kSfJ5xz!uJM|J^If(`B) z5U@O@MErR*x} zL+I^hgrYSt4gYS*6T=HTBq|ELsg$uhx5E}$ar+FOweP^P?Ty+o zpDJvCc5(Y!JODNZkJHQ|4cYg2S9QwM#NqB176??g3PDbxA{U zlsh4vbv^eSNVKcTwa$FB8v#35g|&z;?93nFq`KzjW$O|SJ`={fr(eBsqavmD(v%r>9UZH)X8rTe8{ia%0SdYusJ3le{m5W19XkwAj*ytS zEkQ5i6Mhz0uOq~)0Xw7of`(F2*`2nwHjaP(*#d+tSs?>l12`t@v&^Xlz~Xbt{fTgV zc1EN8_y2^tHHMF4CPLl%`IL2hKi)wNjvI$vNn}Or#IM|%`P&6-1CzgRcI!M|-sz~# zyibCTh}Djpi{+mqD7bE2ZJO;mCJes>zii4T++vyhMiPpZHz4Z+6u_V*qAG@v^%qrV zfwV)7Tyj`1{;F)=A%cz8GhBn9CJg`rlF>U6t(-NOYT=FC-ZZ>cj56S?cTo>y<*zdJzX z<@IOxohO2l_C&^)ZpJaq#*NdO(YqEAwnF7NsE`Y2%P?5K^{;fHo!YDo{Ebi&7`R>I z>y)Wew?5c=YP7$tY#M`F-dKG1stYrwPcOZ9zAL6}*T=-An>HP6QMiBq{x245E12&4 z3Qp6RS1#iUuE8tj+4u%vl{8F39tb&ujzy96);QG3187zI@xeJbP;a}wr}cL=*4MK! z>gwvB(q{hM#wd)udYAr7=tF0<_v&9tpQ5nfP3a;8q7ObidKcJ~inwQCSmSFPVU`o8 zhK2^Cq@{K1&Cl`}r+m-87uBEtlgQ~ z%+1U(C^l1M_gGkn1J3sRs#jhtW2s^?ihQ9J*4Qx@1w?+ndVI6u_{au^DvPJd$S7$2PMQFzIV8VM#=-nuV?T3-*PRFXbQI1NH}r*<1;s^j4WYQc!a< z5JK#}m|5Ww5fV;A)*MTkY$(408q{@l-2el+FyuT?4-VU}kwGi(W)O~>Eb16WXyr`FIg^%pvI;xnJgfGVQ-!M&b!j4*AdtWicsFC zz_w`7HBwVBL=%clOnUs*2gH4ryfi&h7$V1mb=U@Od(OOhZe(XbKqM-z>9m+v4@d_L z!dZ;5@N0QDnt1{Ki7Z<-1u1;X*|TE!&SPK1kF9k;p%f^3e|-G-`_Nw)N{<{?iivY} zVmBW7w24PPQR{qtwf^J>>~wabbTL|S{5UR1R{gmVbtv;1nBikUARcWa0@?*!A&hG6 zDUx^Rx1!K#%{bf01BN&2ts8Fm6{aoPs8nNC5Hr{`kDo`SRj8l8gy>aPJ#ezlf} z{Q2`IXiY8BGKk+i{Kj*gUnDxeCQjS3WeXS{l$63RUM65$>I z)Dh*@S(8y!`t?~*a*h2AaxH)jq(i;a7?XnW^_W3TLhLksS)(SYqMGE6pc+vjbGKuG z;D_1-rvH3b_35_GQ@Eyp6V!C>bI|1t@yRul7)}BCFLS>>pH7sxkK))>h!z{f{}z~w z@Y*O_CtkY`ubuX{-nl_?>2BhVOKBQY%jTZF*t&4e^2KJ@b(isPbFtIEw$*G z|F6opbAng+`&0k-pXUEOdbqL6x=Q z$S*f~J!w~*h_jcih5d(%zkXeW3R(@>6=J9=#}cuCJO~B`*_Zzz5c~Z3b1p6}S65dC zI{XnB&;CTtYp-_c>gvKZ9aBAK3*Nx_pMQ9$zP=t@L-HxLJ%*TUz39qpK~-2edI0SX z$;#zafV!R|AaDcF>xiTvD8}4MeHV(+501a}K;>lA9lZMT>)DDUuBdvcFt#{%ZVU8V zaUx#r`|{7Waod9Jz;7t-4Oar^23BXz{Q3Kr4z7>HuwPR9X7S5jMumA;Rt}4GpbhLk zUcRMW+p#^O6;pc`ScHK^zG%#lW+S(D1>c|Lz%gT;=V^1cyZN=Y*YVDuzbufw&DnQo{l2kChiXBW5eX35;y^(1 zE@SDfHR5nbz!u|rm9wH3Q)e5{7^URwE=nMPa+r|AX+yeOg*`Ge>_PVO~Sl zxp8gYiUm79DOVI=FQ86fEM>!Ua&qbsn_t=Yi;|Jx*Ds4*p$gwS+b~#jo>I~KhY-2i&=HjrcX!9JlI*E~Mlj8LQ>UrxKv*J)0v?fPrQ>|A1c+q#D3d9ykex z0W`5Ve?Avz%oE&AJ#hUyd>_W2h^+<6=kRphp{El?0g|dLO&1S+mVi75bD)RY1gtKK zd^cP?zyYg~yBoVk2jY8qyl&b8Ks{CmynYR38j4BBZsgwth2RKF2`QV{%M1g30_uel z9yDaz%OYww>UxV6^RNdb&NZmaXbCD@vwre=uN? z&Zael*4J$ZvvC;?ar0E6L&>W+Y#!HLa$u8;%;~<`y#qjbCyb1;OG@V8O;@CNV}~(e z$jfjEY@#O)awlwW*Q0rwJ(|zce$4wCi=+C%1-SH;Mg632$>y_}FBuLSL6qJWE-u4D z-n?_?ABHHtuU``-&dz>c-5+09~T}@4mNC>x3KY_(03P4IG_4W~;m6K?B(mG3AN(gfL)#S(eW%g2 zzP)kUtq`?Hdj`X7-T7q>1QD2CWNC3>78$`0kd5QMqRR~gA1*=1Ul*eicN=j7J!5y& zVNb_`A3U4CK<67JFi2k7hb9GA;^!Lou2@Id+D0)w2ZMG!N7hvlww) zK1Du0_%A0hl4jKl)6W0A1je4oLtxx{YjmG0Xx$U`e!3gcS%v^Qg44f-4;hlj@L=kI zBFWi*g)ZMJo8^pCOIz`9ve_CR3wUfUBg7N=vryK0B||HDFww|6hqdcR|Ig%FdN_Ob z?AdhTmk^TD6NyPaF)=Ijocj0EcA}PnqpA}SP)z;$MU-XkAa+Rav1~~X3ln;L@Yy7! z=1I)i>Q^NOgJP?W>gvu}u)u@XQK{&0Fsg{>7Ef5+aCdzHr3B)!Btc(sr*X`?NBUU` zfX(i3RNNEOP|>y~(@YTJnkvjiOw+Ce=jIRdz5C&T3EF5bfC=s%rBOST%jiOSA6}2S(l9-M2j0DH<9=j-NPr z@;F>A6IkW@ckmzNU#SCI>FYC$Y8jDm8lCEU_xSMBpYS=si6|Q@z#fCbz6Y0ag(2gv z)X$Vh?x};YCT;qS90|T^vL8Eo9s646p#F2i62Cz*!FvQNo>gHRvwAkdfC_#U*TRJy zp#7mnNS-O@K#&13VduT;!y>T|16XWdTbB zKvrLOZEfAlXy;@P;(KsmnDoN~@zW7_VRQ@wA_85WB!(u^bcd zoKag&W2!Tqjzq|-=N0|LnqyPP9_>oc$SA>)7Sl_ArKC}`ePhgwx1e$_Bb>cTODhmL z5clY4%Jap3RSmm;ffXpcw{bRpr#l`M{a~h(1IkM|+)#4#l`A>_Ku@^CQBnQTt&)zipS+_@-j z;u9Mc-;YmH5D{V5cj}-2?WO4qq(5XTVqaTpGSc4kE^KoY78RXNni%OlU|(OQrk;5x zR~)LC2lhb1x^*weCqa;)EE#)jA1_%QS=7Jyrz_wcUSp3T66L~w-~(TXh^Xj}B2NYP zIr|h~9|42Gesf;g#}GN4?w9A!pa0X9*1Z?R6!RL$PqM)iQZ^3Ev5`ML@SB2o!Q1WZ z?Btp^FCs|+Cla`J8-fBDrCI;#bTEIFq-6whpQo6 z5i&s>KyhrDRQ!WXZ;Z^wm#n+Od+%=5)a@$P=TWYlV%hBib;=rm6joAfOl7?23xQ+b z!6s4uCY>)eH5`*$(fH$5)AzZ<9(sPRkDw2DCMbYHn>H=%&KMAYr?dGdvEc~2M>37H z;rbxMEJBs6kN!>R>>$=KmQ@*E{Wove2D)o29mJSZpC9@!t|U^(e{xD=9ITaw&?=y? zT*Bc1FHP=)klE`97$I>m4BU`zSFT(sN>@)TY_>7e*9LCyuUS`A-2ZE&I&u2UncF~_ zHC6hH?*GtRW3vSrT64^$>q24BLB3C3Je)s6L!X~~40J89mE(nZtpNNAvdI#^9VWXX z3M_%Y0T9OnI~=gYXW7mGr6{dMg?V6CNyMUE11w901e} zHK%Rq)%@kqD3uv5kB2`&hc%my4Rx%u8^md#57TS|tVo&;%fx-_k{?AOk8ILGeY~%t zX~Z~>GnnkppZC8?Nx6pT)B_ahi7JVFVCH;WTted+crjKF1i@`MDNR4IwJs+oCqGS0 zyd5}mp*Zl;iBrud@rWk5(%A=239J}{!*cFc4${cjlpIHT^1=acC%Gc2&IJ&)74i+9-Z*PCfjW0h)htE!GfbV`|3AFMernwVwN5pZCL zXYJ8Xe;mQ=NGS+LX{|z@I8oZt3uzLsh_g2}c|&uW?u6KfQo%;R3StBA$wydk-UPQx zmk4ve_$}w~`*E)AF>~{Eh;Z=D-3)9S$_!O>S!+Mg72hKuFE20o+vMAeIAZWCGPSV*Zc-_I!0$ zZC0ppYVOT%7EZPMouT4tGhD>hr+vh7G!2|<{@_0;=FpeOZWma4!x3Lu1KrKdQJVLS z3WFXDEF{xlZuhlEj~?NX-O>+SbL0R>-@`Qu%a^OdOq!oHn>3@7KTvZ8xU@t*umABQ zI!SleE-%Mwgm}uWr$!Hasj4bS0GYNH>~>O(+p{d2b~v{tGd&QR=eNp1TM=CgYfQ5h z$EvlN=Ndgg6J)=6vqj$V&tAh}0`pmAa<*ON4%M--D`n1q;NJ(^rYCaf>32FQjqmW- zI^>_caE5?y=@=p4qaJDH(%+bdm>Vf^JbQA+VD0mhp1CPcP^|#P1Qg#c_et1!JjV~g z^tKO*vXkWscy9p%UlCi<9pNmUHz8H z&y0%&;z!F36Ttu!ob%u|RPVXX%>yY>)p2aF1^N{Sju?F(#u@Lp|Vo!yG%33hD<`*c6R6_=&h+-wsGS|@1Q7% z4&;$-K+svBprBy6>t|h^ka=17wizKOBZN#ZOEl|5Xr$a%KCUAHVuycbCKQDXWg;p84|h0V70xEE!b`dpFPGlio#;oHYV~*auuR;LvwF-9cr`BYOlQ}F|c+f|ISdj{NA73EFn?y$7`PE zwQP7-kPl{s!X^A5{Ug{SnA?ys^}7y!Ni^0t$7d|^}Kqzkxg#an(r-u z-NXQ8tuUxI4&QkyJ3>1Ac0|?Oc;%taFLZ@-QMfJNzN{n48@La3n`3`Sj9`C+QOP) zTY3T%f3T61lG+a^jP4Vc&GvntzUe$nL#D~b(|de4X-C&4ejc6}&00>0AF82TTStI! zE$8dRJWV|b7mN1cg!dvCh^z=42GcC5$?P=Kf0v|^XB zwcBX@V5^QmZ|co{%wp#dYI$=Jo3P;IMiCKZunkQ2n!YqvXm#V!-tl! z)FOXabe7nT(3zyq!nKvdf5$S=fh&;fq|EL&9|)3YMi-JO!+|XwF<|$*^NypTMux~w zRE5n&Y4F2Tq;w;JH}|#y;GTJ0m1dg&>@W|knE{BD29%;(QAXiKpBzmQMnn^F4W4x& zMUczCfy>y~_&72qEL=@jE`juV2fD?bYdplk9w$gnPEO+F2ktq1e5l7fzEPTITqpC&cL9b#i@|L7_&0hrS76_dcHVw>21N}d{LrBK3-1vb4-PF!&p~TVA z`Rh()zs0_;L(NiLUeCPRe#MRLZGd|C;8{^HzryZSN9^d?EjCtBRc-%X6x3aU--Qb2 z8oENO(#l7lO3PHC;= zrZ}bA5+`)>^}-=p#~mT}pg63_iotzmO~ZfC|1=+P6Qy=Y#1mdCEz^}xZ<`$i%({*( zZSwP8P|!-z1CLiSSK*v&k{gGBJINIi?!_9@U880*btm66linYvwXM;}rwO(=%aFp& ztSD>t9(0$fwlu@(#;vqx#m4@US)aeBSg8rvEg!gjq20z7Z&5Yo-o0l$s*#$;N0QE< z>n+A!cLa!YSCP-Vqx=f<5M&+K9D1(OeV&;3^VsPhb$Hu;J5j-o_Ft5>{LRl z`GvhkZ$Hu0A{?hVk~u!+ z;B5ni_*U_c(c6x3xn4qH;ev(${&-or~PqzcXvYXmWSY3 zPkEiWbL4ogAJPUKZ)jv>C51|UutRo{3b1Ycw1tsLywXayxXRS%2rd&rRzZ{OeufJs zl=vF!G@Fj)%vA})+*4vYg1SIx_w5ICvR+1Xr$49vuQ;!5%W|j6{g=5|r+QP3#>L<5 z?Q}w8ty@dSp+kqlp^z-kn+)GJOKMHq-5pr_9Ay7@KyQ6FWJM)(u4pg@Xf>0q4ebkC z$xO87n}Xk8o|lGhzmmi2eo~PHJO7Ic4P01X6O9_fGVh#?vIl3$k>Xy1Sa0!{BdFpJ z3ixX4bo#@!=M#D2tUP)LR8Fk+`uXcC{0{NcM&19IOK$>k6aVVdmto$mO@&=i3LAol z#IB%W!~mT|vfWYJBS)uytKV0->tJcgOkAlq7(Pai_^D@Oi=^%R2(sHzN3dnT!Kc? z_-1$#FKWe2X8O9m^Pyj+hgpKTg^m1s!ZW@3kC zlR9)RGVl|m=cXddOk!khyS(6e;PV%>3b>el_RCW?YKpkC{vP~nFj zNVQBKK2ZL|q;On>mJ;N*Q5oTt5sHzAI&s9r3HXjX&`8W9(+vk;gC^oQsT$U>P)8Oz`{1SI{n-d$>3j1g84vS++L|H#qPdS@ zMo$(m2Hw9J65@>%77nwIZ1Sr85B;hav=?}yWMg2LoFU}W?+-`Ft5mz*<)Hq+#EPHD z=km6JxDROl+&vPwR+nMad<3?^%$0tW<060;J5z(+7HX$8sfCyq z0k%ih=g1d{lOTkc*Lit)txm4vMHeGSmz54=Laq*Jd4t2rcC$CcPjvqdy)o*11u}Do z3mC1DaT|5poOoqpg4dtiaeq)p)E`|L;YBtO^rqdMmmCY1bS{ii^>uY&w)*|@V}Rbg zCzq`AFzCisU*tO0YlG*p>z?$-))$s2XmZfGMwBI^gq`U;K59$d_JE&Y5E-&3K^=$X z`F5Fv#}ktS2(+gvpYSup_koxD{4MwGTXys_vxkj-kK;qWE@C|qDDVt~tD~jm3LXv{lR4Za7vE3rDEOk$i~8c*F+9KUTMh6OdW3|W=izLVuk~~A zX{WAKJfI~@mriDG`ubk<^W$cejEt58R{w&wmND!Z%=QKh*l|k%*I=vPSn#$HhoQdH znK%6Z;)MAg2&wr4Tbvvn>wf(3Kz+A#>C)GbS5N?{qK=rcW;A={5v^KkK?$jx!J_7N zGA^e)!MkH#7~Wx@HmeTsj&X%7`hFn4EvrMI)K*2s?;drvs|?1#cmIS>{(^Y)o{;Ix zCYyF{2u%!=kU8{FGY_?jG_3qD0+LUaWM2**YiW9an{i#f`|j(vZ&Kw{{rz&y)f@3%{tOK@)MPk7rn^>TT8>;8wiLWAvdla9>c|TS8BPR%+l0Vr z`^Vay>oOv(Qrz6wM@B|SX7+X!TfKS)XmQ5X(Z*wg@uK{lhY#Nk4v_lDMG%do&DB96&2>Re#+$}7A_KWz0(W@e!k3Qa0w|bz~DMBqC z$=1pTdE(x70@vC+S$FD)A!nS^ISUIVXghsomDq?Q$n^GA5e*Nard{~+>%q3tSub@K zD_fWKa@mFr^9X6=tfRdK_H;kj)-X`^V8_FAW#P4gT*nR;HIHO9M4AHJ@^yMU?)1O?qXIcDE9L+7l z5)zswcJ9;XKLxXeCZ34BW$V12&*$TtAr!MO{=wKZ(z~zK7`A#^MO~R$fI$H01>spa z+Mt>`AmRj{??lu2Os%Y-huWebc|4I|j(rN-!pmP$9{NZ8{AF*LJUfm)advyn4RK`Z z%?$>Zmy}*zUiv%whq6=TjcV{CoMs~N(=25_#iH_((6eiv?_JUix|JxS+GnXa=)g=EZ%Xgf zm?UI)AQa^nKutNR%L z^c%0CdZD^K&nNf4e7je7zrJy7#xrS;50B7F1W}T_px)goq7Fl{AuUE{ z_>4JMtwRB>nxyk1c3Qj0L*CvoXua20qd3kJNs;T_j!)}>5M&64{=w%buBl#0LsO&y zXt_UN?avMWHXW>%N9!Z3C}~BF5A%*dCR1%LE3X{~Knn+;ux#1QprQN7k+3xlg+gxaXtw;^VOoO9V9DIVLwBssNH49{!P!3fI#rg zMg`*u%ti*&Y#dxugLm)J8;+wSp6>w){Jn&cfq^hd`UF?eS;^YNxr}O|c+F{_LmM=9 z@Akjawy}?gm-pd@-O3{V+{<>h7(%cm|0{OEQ_JQZ@)v$lUp0Cw{Ilk@Ex#+79WE0z zr0tI_?)=~FXe#h#fvNLCcI!xA^*d{PboR7ai%DyvNiK*sB5T&nB$~olbaRqXQSpnF zv9*rn8z-r42@P7!si&t$wruj3K~sBxHLpk9y^WpSE~ANa4K^@ zn0n(#8r{lAS z_GM&`JDRApgNeNq@W(_WUoiWJT>u9s-`w!+2f~NcZ}ab|QXr7fB?wt~>AG=Ta_{T! z?+$e%e$Lv@jq%{3`TBY3W{Jik(Pf5c)5`%o0K=v$e`SSx6+v?=t4!yV*$Q*$XtG|q7OPOF5r;4DSu+$f0d_r&-;^7P}i2CVr@KvzEz{@5A*@g2JU&;BmYz(cgArhSl^V zhUyp9^!8cq?C>=3%NqFrPZvzY9){E+%G8cCb(R!I=|TV^vshKsi^ZBRngN6 zJqGdpiqti63_ROH^rKVfn9U{qgJmEPbPvy3I@5o5zs=yPdoZBDTgHUB@J=C~w!HUL z;jpEJ#V46&M-*&5UyVcS@yoz{y~b|WjM>9a19c#v@`@y@pZr*A>MapjyaGLc@b1_r zCszDcPmOZ|@XDx)O(?UOSA7!|Rv47?<>RJbp1ygpaq8QUgJor9&Ay@KS@TRp9Rl(_ z%U@y)!UxlTU{BoM2V#%=lmlEJFoCnX=9d2`9iPUzFihgi)Vw@29gh^DYsYX~O~(^Tjf)_pvv8cq{Y+x66%lT>lV!-HHbKx(MG zrZ7j*E)B>wHOyi1K@-`SaA%u7FaJvbDlb%q z?D;8+;uP%TwKvZ6b`;({pJPHeMk|S|b*rns^We5_1+aU5WudSYGIeNB;2kWL(lXLq ztTwEl49|MX2Zt@&yQ}uv-TW4T7=ez5z1`3=~UV79rVn3&@5HLP&p*akFmaf5Z zEjF9WldapdQg$&iv-@%}0u5jpeI@HMVVIlLTva-~R5d8OtZW|pR8H_DXjl3VCWZIn zlw;9ExLa)aWb@7{#zQVI&emR1GUsJ{LC1anp_ct$HY_BL1*{JE5E$TiJ0cHGan0rA zZpQ>yy$g2=+^lEM`{W{+C@zJu53V+O4FTB>iYk6Q=vo)eh4)h?CZ3=z`u#>@%X@ORYvD?$$8;W4#`6#B?P`WHZo75_)&7eLW4Oa$Gj$Ib zL+QcB{DHOZSCx6zq9hD(yf|Co4+CWg&Dnl2!8Z|UCVd*cGwItKO|w#tyGS$3o8w)^ z#~4d9&JJD)2{`fy`!QjJpq#xwr`nh>H_gR53_jSJT*Xtw;bxd1MVHK?Jr~DEdZ|5* zlVN%D3dJnuyUSY)j%qB+{`ClKh?DXc9b&%5k2FRZ&%~rnhcwz)}+Ms1aP~l&;HVMexOU&5usLgVE;<)2|kQ` zb|o^qs{m9VN#$fjJ~jM+)W7Sl-D@^Ci6oTQN6a(afdddzJ%nsjKkjZ1bAWX@ zSl&)cplzQ{o~}^KR9zTu7zSv#V1wSO!j2U~G&>O*2;+(f{X=H@t}ginOMiD&CI~Wb zTRPD1CJ`j4Y`wWK+S!o%Q?N}G(2b><5o5VRZ^>jbA3oE^yg7wGb?n}o9)OZV3+}Cv zZ$W&IVRB{|@CxAwNi2OZ=3W=BxIco59v6A~d$MWOa{Uc$Rmlmt0}~w%xe`CVCE}1l z8Q@@GoDQkEpp>d;Xl%Siya!q*&zYF$*RPe}#hhy!{z~VDEqhlg#>J zAL*t^Gsk{@OR`s!lWf9CRfU@h1==<2;p6bUvIal^*jcxJV#CtIp(WWiHNaZA3zsTOaDJ^vp87a&D?+n`<0wOtZq`4e$6_HdFYy1d;`AfNL3 zWQo=`yBD*_Ga(cKGZwkanY!<`wK?#wO5UHk3v&)o;1l;ddD^sseLg|=v<0AX_t(6b zDr76KXx+*M2hSSCRe^ZB6B;y`m}S*wekj13n8U-fVto6_eNkGc#dOB+FGEgb^|`{p zbGQQ}+FIMEA=0M2=lA{sp3 z&c6|q*^^}UFbXV8ep~q+2bHaD7w-@YHqmvvUY8gNS(|ntR3bP*IT*$x8J)BTND)g< zCP?(@in^G0jwfOFzzYT%31lwKHvtKGakIm30Ap>FGiEkuak+@XhI$)x^quy11&LlJ zSVYsA;0kR2o`sk}V;;emu``ZCZ85FYDH~AREX~s(nE~_>`K@iG9nq{wWLievW6cN; zdC_Jh0dK71>zW$9uJpV2!22QL@qrkt1EhtBE(P}^fiH6_xNRMKKW+s7@lWPZ`ycj;iNgu(Kc6CMU&{KgF5Knro*LF)PEO~_>|Ww zl1b)(JyDp508J$)mU@^iMc7%u(74L8nC38c1OtW?YU2mzzwj2P2L2Mbdy!xA`Hf^k z!akLGjX%NoKeFL$t5M!E)okUC z=$b?pi*-va3*#*IUo;lU--(*N`D+7aEv#LqBiM8bBOJcGIv3%n*g`Fks2?tNxY^FZ z!Nl;=kr-=Bv~Kt9AO zlFe(7F|Q$fL(+#!@yV4Bq|l%pI1&JI>Iww>AaehI4+8h>w<7xWG(MK*;{$+Sew`DLy1twPCZ7OW#DD?1(h8OCZ#-H^fzc<)ix^^Z%nf~m(pqpHcq zsTmp+_VBGFsA1#rtZE!=?n*POsof(UjC2mSozMWxNdY zOl>J5wedznb~!RQ-2TaU&@t&FkaWZ0(&v5V$DKxp5|jVh*6{sUdBo0_#gDcjw2&8> zP9NI*W4_hjQaGTiJ62x=O-%3ydNgSCe<|eFI=dJ|T+*IVIf);=0XI&r3@9{3PKA3} z?)+H=%iSuNNDoBCMx&lcgGIi`fx&4y4F6!AwsKrVmrocv;>d;1sUT%E3uFo1OaWW) zW#E-Jd3n>I&SD137UR%tY3d3*;Arbwx&I+IDRrbd zOFCIP17uPg0XF|UWYNjsFhJ10Mt$$iE0s;QPYs91MZ6!BAZ62xXXZ5|Vj7XbD4|t< z&wcr88*47GgR5evHdkskdkft|9B0lgj~`MS<`cVtFCw0S8tb7UodY-1Dm8}7ER#Ybi32bM}+N(~zN&RdFyL_cJn(Z2xBUi`!? zoC5NzllXu*K}`zYP8i}r5()L1Al`06GVl!!=EQ$V_G_BNMCdXk^a}0{IPmx&sqfVL zvt-GV{GuXNj5Wl;rJ)rtDA4c))@^_H<0!pp+8wBV5BL~i80SkiudTTIa)MR0V=bLJ zTfhtLQ0OLIjFBd%zP(sQ1S!?zCriJ9PepqPAKbJz)vVrQ1{l;V3A5@=^iimpwZN<*pld5-~8&|UOCcljyZjr$8V_)^nvEXMm}I9W(S^1qoSA}zsw9)Z7MbH{`>{Y+By9>)yp-`MdeA8aoDgP^bG`Lj(&`24^t^~6 zR5aiH769RyN#)wh9v*N|@)L&U4DmO7QCJnATk zm_|wq{UdsJE@P+}G`}!yQB4ekrXECugrrvWA!>R?V0wD&&7>D*-+4iBgOYSbqbQwS zusen*DOfB2+`5!ogS0|}C5?)LufYfQ8r}$ewSIXf0s`}jZH--C)4&U88`S$q&DG!v z_X35*$BpP+sGmZ^zOnjcc3ya^jcD&dFPI{>C7ipg#pfh#3zvEye&{JFI}TBSx^K@` zz1-csa!YC9I@IITAd`Cz47cFmIP90I*lja^lpLI5Dzd8t0;%1*#W%WWj=r{dZgnM` zk#FHOTEk zO>L-TGSYGc%t&AZ2gT6kMmOhR8-5tb1%M?a4oCmDhD6(p-``AD>O5Ht)aC|pBWKrnfI zkZvmuI%A`gFaN#=ja$kkWfMg8;DhHVDQLpbpuJ*+5uPf|jE7il0iRO_f9^zYWqvAZ z=ovKc3HkOJT zSm-VV6=^Y$UiiB!jvmeoMzqkQnE3~z(Ir6gVg`ZkeGgf5!DnH#KME~xG_AcYE}jin z=TnP4*QRT9U3oBP%j(yBMK7K{EQe5?(|A7yABfQXFCxy6yhwu9>f;vGiH<^ znc#R}V8NQ!s%Np(kPCzN<&Y45bbXosJD5_F4b6?m5M=W6^H-Me&%{Kh8*J=Tl{nbY za}D=cntItdP_ji&!#0XbNI7fh`TX8=P?jCXY}posC`yCquoOmj95mwR)&W=ZRUv0J zqVC@Yo*O2yWB-0)@*4z*9lC@W&!`v$P>vI6DCc5tZ*PDyi^h@T^A_fz`KHkZH2a&% zl;m1iY2fNi ztbb2-rG=G~nHR|MUczP`UsxuMbr!d)ep#p}>?=<$D7(!A=J$Z1kCs|92QRg$% zZ$eI3S*ledAZl#8?}JFrV|N$#EKZE(`a8TNcA*Jol;C!&WB4HR;~*%XP6sFPBDVvt z*7jWiCYM>WQrCq%^0D}*X_re-6&8h>`r?}b{&U911{5&{Vsr7xk$ad!f>|LUGofzd zD{(pojp7CmT)d;=w{Ds7Vxzy`r$0rd@dwOrON=+#VALo0+BpG<-_X0_CZAf&vm1}jvgBWqgEJp z*(nGmG=`tbVQ&$c8KX{tiv>g`3Ugqgy8N$Rp1+AIgLTFj0qlxL_Q+I z)I-?_)L*BY%1y&A0OBYBQrk7A8_|IUP48td_@8&GGWY*6K-V)B4d6&RPpovIpUE2- zIn&J1odIHRz^q{aA?`;KvK95GG>1t0>0mS#c0BLhJ0bL#b$mg2@&|;ur0Sn^TvP(z z*V3#m2qE&R#%Ll(XBJZLFSIrpKt?AZ&KmtYd=Z-?_C%v!*MGey6>TvmSUhPAGmX#& z0SfbWHZVF;RvzpWbeRb-mPzLZFnp9duus&0jR1t%P${Fxm=49|E#5LY$1o>%SIr8z z$q*#ALXLurQk@0%$j%9?DQ;A_aIhao9uvQAY;_*^&wJ3WUimjGeurPe4ZOd0?A|w} zhn7q{Q$DomvV(m@*V`w2kDW$Yp+r4PL)I}z9eEelYTuBMH3_}o;Ex?UcJT&32H=4i zfD4)r-C$6SJis-hf$+u4;twCT!i1=$rFAnPzyr@2%|q1p3eO(Yj+MHplIZx~T{Le# zCnjhS_QdMFE&YmgjW51AbLSec-o6el{eB(uahh4)P-)Zc|!mS?@i|py? zsf-bbUwwvQCXC;65#^rcl|5h_Q2n9}F24iR@-NK4k2m~h3Dpw7&@`qW+?Olb3cvR5 zvNH@e=A&Xr;mhBtwyL--VGDH5_rr;AW9Hw!lnu0Wc+(0Qc&izj@V%hupF4b6RSllC zrad{`FE2C<9CqwUTa)?tXIea%&ZtBsRa})dF&6Cl0mRm)7}dhKVr}3Yd-!ZEK4NX2 z7lzDYY^`wCZ%l@R1)heX5E;$TNO0I4k0CR#&Li=?#!V6&%*rD##enXw;9S-N_Mu*5 z7#PIxS;%4q`f{*{Z(=bqKY$&T`@aLe{9-QU+MPH0FpZuizBGM_L?xU9OIvnrtsn@L zu^BUFtp4;21OBhUCA% z%4~$wamd{<9QIywkXFoYp_s%LBm)|$f+Beee6xX-PUuQxxtZJo5Il~Vbgyi?1O@)RqI+v;+0#W!D$)GSm5HBoxAI7lDm{joqrG}AA4f-d~{U}^C zXaj*^4mUTZG+J6_qqgmzYZO5e!h>&5Z z%~;apUxbk0xs=veC0EwIFE+OWHybT-jL&oeD_PSRuxg)bV-f~(FjPT+!k-Xv>kxOT z*Czct@TLV+UivjF5e4 z(N12weOf^K-jykGW_q}Qtk`h+60-4mpM;v~JFbt84B+{r&Kc;;q=6rRUU{Glo0`$q zzXfaF>pbt@&~t+Oza#QJgWY9elU@`14FG(~=E}EpBlZu3)TT;67Zt!^sh5i|0u(V+DC<6IUXNM)*U^^9D51n6L>N)SPWuI4 zi}L;sP8AMs*v7%O1Xpw(-yhj_G0G+G&&uoF2ikvaQ*qDpJH5Bou7&@0+3n~p@wluF zR8SS@3GpZV9TEuyZ!-LqXMu!QHp?D=}W2dh3czvxeGNlQZ@zq6Pd~&|;84 zqvoP80_9d{+%?9(P+nkmc?!}O8>42t+T(gd8stdqx~-^+F#zJ| zsWvYE6Ydf-&*(*--aBn3@8r6y(|hrJ3Qq6cPA+vaNRrPJ>#YX6Z@L*+AM7#l!7l(o z@9+fZW|0PnFr=0uL5Sn-0|Elw&alCda3F0~eHCwW3GxTRpc3dI>Z6F4MK6duHM{>8 zfYbr@mj}(a%^4s=3hncPsQ)WcGN3inP#}<%XT(CKOcu0P#G7c7MUcMoFZ{dDrv%_2 zKs2}eCS7_0v}v2z-X*KB@*Ectw=gG?hA4IRZP(_QZpf=`$hmkE7vzw4sBGDqcDWsf z>uHbM4qlKP-bj)6MqdGgQ%HZxsbF<&_m_BL-SYLL*2{o2mf|dC?nu3gQxH{_zo;}SVg8J*$hF0POCSS4A zoL$N)9XR%$AR{@v#Om)xaBi%Yr|fupR!?xy^2>1Z(hr;UqSu zB0=8=`1fxnfWG;&N+%-G7>JZEfC2Es2BJHSh%I@dmjPO^c2WzvBUs2B8@ww?%()Pf z)_bxG*O8IMisVnbLr66gmce^0+BFPoBCp7$9kd+~dyXOE=cZ2ypSz9YB5Fi%j17n< zj;?pF^8-l|00DR5$VK=jmg~!*i6TbjMAc<3)DR4VY?}?T3P50_QE%~Z0>&g8knd7OFZ3FL$lZH1|g38!VLm)ixjjb<9}_cmWkk3Uc{G>Qf7(f* zPg|(HY|pt98sXVXKCg+(JaVMPf1N}z!Y)T)z6SW zsQ)uv9|YSuDA$8VH*^PTR~(tSEA)?LwgVuL7J#PiKbI4 z0f8o|noo7b;4YNa8qOJ@cwnX1ayB(Ko`yP?rgg9O#={6lMr3usO|=l*$n60%3(g0< z;su?Jifz=Y?`|$prfET6zMdPIBg~+{T!dsll_4|;p?6A@*85GN*D+$z9RLp_3OVrR zsS3=>%8E*)o-*ynzi!A+M0ViPQ&U2wCJAktDLOw1g65fYv+`KqHdjrWPu) za5_cZxJQFK5*Met@u=>_L3oBUL;Nht=P7$y z@C@lP82KtS90;-;+KpoS{(_yOAmQ!Xw=(uJb1aR1-c;@H|Mlysh1u2Vfj~p9!EN88 zYG)9dTR(eZDlP^|t~e6=OQ&J}Sx>neX>C0hp6s~tC}eK4zd~#M@Z7@}%3U)q6=VLJ zzjlS>Oi_lmJu5a0fSk93jLbA&4xPmLrr%Td>*bx^`yTmvuB0AXZh;)o>2_g__eg-l z&U$*xjI!T# zGLF$Fe&oAG`{2MIu8PeHmxmW=yr|IMo#B&zxp!rYHZS|=k>fXAR)3klC9fD_0MQ$S zacH(SQM~Abe+&FKcv*Ii@05dOlCcbx2N{DYn2R7UYJh1n>A~0nN%>A|4Lei3pZsKf z8vr=ZhI`T-`4*A;1AipE`q0E?{2#8~10Kuve;>cIvlE5vq!NlygzQR#kW$D>MiVL7 zWECPR%1E+PMu}2U8A(nL0N~giBgbve5!$&4O*x%j(=?$sWz@>){0KH@QojVy%iFv`z z)LmxKFYg)sq>u4Dz>aMA)5i_AyNxPm)zK5J^Xub)2r#rrMB8!+^oUfal14mgI{v!ty;g1pE6|@y{UdJlCHKrX1#HfHQaFW$V$^oly*j`DF3{I-NqLat; zAORtcQ>xWZ3gB8>f<}e#b;!UAf|So+Q$g^8RyM6n#?+vbBjWr+J6=PfRBBgG)VjG3 zfRWo3z4BUmRnk;H1*HW5x2RnYKfa7h^k|496%o%4*_Hq7zo@PW89eAzYiarK9nHJf z(rx7Y{Vcs_^wbo#=$@wM<$)95z8G-CoD&!n6nxX+EpJnOC=3NFbp=mEh8x|rCfn{G%r3BMe|K!9ubV1P>1-29$WL$2)oA2Y~2m65=hj+<7(V>1WH|Eb@pW9 z2{8{HsMi|oWcc^l5fQ0>xA9wW&{dxwuXH~D{Ir<){Y0R+nTul6q3J_M6V8|>4Z!34K?V9?;EnXZI&;tDUqmaH3%V`<(WqM_ ze*@_Q)pHs%UWE0AgF-IMU)z951?*Gd~N0%yJ5ePXfV;5y%9wz6Ymh@>g3` zNogr9igl6(Jr2!?u4g|CB`P>@1-mEd{nZnt8BYVtgl3C+_7@f-U&n~zB1wa*>S{PfrsKfBCVm4u6a?Q* z?3ER{Rk^8`$c;my-PSl+}C}r--d3o+>qtQqnq9L-dOfm zZM?kfw$1d1n|Tx#<_LI>dYB{%kRzAm!=XZX*>W9IM51HG1e4fB@gX)3_1s2c^>RIc zw%hp)=se|sd_Z{(Ls?)2!2~I$@Ki6O0AVi0?LUXfIeEM(aMpViVTMW+y=VkH@UWLy z;yN*~vTEbhW`6Y;rYs;nylrY~N~>8$xjZ!#OjLeYUSwm&=tkM$bD9sXy!~k}0IYyD zSUi6JBq1|IAm$Fzp6#a;76Rnr@mofLY-{|h!((CC`{JFE&`VG~35k^Kc;GokTrUKT z2_)%G1Q%a{pB=gHATHg*`%W_{l)=M9-X$s{vZ%Mi^fx}v`r~^HfQX8ch|`G#0!#Hq z6LKNJC_P{`2Yf^=y_=dF3YTW+ddOUuQhPbb8nrB@>*@1~(+nNI35yiMS4QtxdM&i3 z=y{G3iB4VoY4q-XL7p3L$=zp?hC z8AcUbz)m8XrQaSTDmY0h29Dq4KC6w9@|>(4d_#{ra% zh5{$;e6r-L|(2=N%Mc(|uM_4Ub zN6cPcOZxofeAqMJV4Tt(1~=?J0yy}A2D_j`&rP_+K? zi9lro0|SgM5E%y~ni*r%Z$nOhI$#8WpdZ?Tt*F9U2BW0a3Vqlhqd^ZQnolf;yB<

)2V=n3&f$%?`OKCQ zlj;e78~ryurZY^2M`ZC6DR)t0NKQJDGN~@l&qr6}zK%}P%JA@H4f`;PPv*0gD{dA7 zMF=XjOzQbe3=Mw#AcOUVX2Ns8H-@BzSTXO1@F1b>Nvii1(C`qH2@i|~8a{uQX&P_vjLhLhhO@H-oTM>4 ze?<0Ri|sJ@u~o^r-F6#q>JaaYl=s6>kp1?kx&VefWrHlkAGC=q_}&*F-T;Uaj(lJF z>(?#x2Ud7r6bh(MP{tW~VFVwjRZRvSgs6lB2omc@sy#$o(O{xd_qvRH8BN35Pc8lI zD-QbKNU!TO%jG@#?e#ZP$0t;lHyZSwUrP*M#jlB4it})~GOPUTMEczR(68P>QJKjq z$FrTN-FddRK73xO4uXTTB0u5Z0Ms2U7ldLMz#F>m#Zjp7d$b)kyGsb z^d^b6hjulS%kogzwWDanypiyzqcZX!-d#(287v%TL9?0Rm#&jGaPMCZd}A9o0>pe% zQ&SfQah`W#!`91e2!C%~nz3$FJ~Sjm_N&X^7<-s{CTdPjmQ7Ac?;`gF$0pYA8m0eY zWYaDYk^pcra-vYY;eN=EKB&iK5!|dK)HOIZxRjKYk-Kqb+=)1XlfwWzbDE$V{C|n) zkZ>VzGUd-Gkd`~`JINKjuOoVFt&rmTEkiNoS{p5zBViMGL^R+<)p+`Ci?-8}j<0SP zwcNR&l9P7IlXe91c8WQUN4hSkgqBfAk|#wdVmGDO{7VB?6l507{n2=0#cp3ZoNh=r zIoU1wsSW;#3rB!j;)L`^@FHOqf)}^3x54>fHx!_khJi9~ z2#bIcH>hE#kYp4C(g&lDV^6IEeoSz4Ae+C(aeb{YuKPXLZ8zEU(Aa8=Acc%dk!Ymu z^;&tqjg1&NnE{9qQ2TM=Av$0KTN&J_h@Os) z0wP?HoX(#Ui;?7~1c?Wq%JNQoz!zzu8?l?_EWJw4%81kUeI{(L94{(W?Ab6bo$>x# zU1MQ9ZKn7u98)C56a$TP&o5J`DJ}5k;Zi6FCKCh2wk2PZsQ17fBgtqY5e#Tz^4_28 z!hv<2oE2DSq|wA6nh^BCCZj`^3nYfz-dK7f@Nax=eHoveWYsQ6WBBGzz5~h=a(rNV zn$q4277b7{&cVx9ODz|eYP1NbO*Vp}iONmn+mkJZ&}@J-(9OMs+uo6vm*v19xPq@F z49{pxV*mo~94lLM_8y8At@u7Pr&^#@)q!R`-?wuKxQW;*DH8^;$*=hPD?hC!rqM{26AP6+vP=Gv1Q7ZR_A`ks8Mx^K~9N2^DM@e_Hc>T!D}wrlZ`IspTM zSOr46Hg#YpxkF{4>z2;&NM{921gB&0zyT{N@0Z&?)#e0Hn?&7kOalcfTvN1c_o#VQ z*cY?!Rv|xG10IAZUR+fUEQByoaqL)O!vsuOvOd9`5Tg30$H&HsQJY9(j78+C@bD(A zQQ{q5gkVm(ayQt!kcAECnuDB#*qI2~8SA2akvg%25y+fU zOo$ZNdHhbpd%u*`{f-0J6JVH@!ME~V=GdGMq~D5EyF`(V({#sLoDf~AM+q?1C^!1D7_qBtw6)Bk`WMg zKV-Hbm-Ja(ERbs};Dx`>tOSXM$YhBTIm-1IueE@X8oC6};;#_1CGn7igoO9!G2jsujhcTuPPYESW}W89Q}oop!nk57 z9cKEhpB3Mpr!V9_G)#$L2>-yu<|uZ;ZoKcC;rxZ2{QOk>wdnzChpZdf_M_%G@!;Z? z>f~=FJ`eVm#tUpoe|7k@{S zt6eT}NdQL`{JxRFBa&Iqg6bfe<`ndj8%b3d~bx@sva2W6vI@k>g%(P;h2l|q8knA52l|UdDHZouJ>^}_1WW}1f4gg--ur;T3d4e!VxB~Yoa0<#j=Xa zV(gbG9DOSk9Ji0zYqf%A%H7MnL3^7h6~i3Qmt1%kFLp0y-Ic7CbYi=IP~~;3f)L)M z+R|SG{QQ%HKfcl|WlD2cuIc~7(j|wqrsl2N&#r#Jbc3brgbWW3uYJ{pKwe^+E}%_g z@HIat{mbGcKH&Wu*-x`m3yVA!wso$GeH!YQ71p7kOb z_v)RTgAV9xtmh6?rZ(wezq?|ps*dUhRY^6%fjwW|@{L)cdrnOLZU2m><_WEhx4K&H zSdIG}_nN8vcCGcHxv_?Sd&}24rrSAy?swvKh%f`?VTLGA?honD$JGi zJdaa^p>}y~pxl0c`3qD+k(4to9FcLVD`lP@ag$z7Kbn$sF?{6@H*q1E)Ds(49Nf5D z(=y~y{j#-1e&dHLFPt)n)7bY{&n$@+9g#w~OKM4K#&&x0!AT28% zAzBJ_r60_XVJVH_P+WYwX)q(&)_%)%HyIZ~oMveumpXv_soH z%+q9Meg8&|6L*yY7`z`xGLJs}_0Ffou3%>S2ipYtbo+T{v1LzNd-_*)ho~NQsb70P zcm0as6+`uu5-FC4rhjwG$To1(4|KwXC4gq^{QRNlI8{?WC1OO!z11e9WKq!L=L(XY zit4))#WD8zRphWj^@@G6tr%$jUQ+{@>}Q|rR44lz1eD-3Xu{DCnq)tH!MmimxmU?C zRrR*^l6!oSgbgrpitPUTUgW|Zo@^t(adfn zEbSfNP(g(6j^zs4bFBv|O6I%7WycTRzi4{$(vGoR4FB$7*u|2Ec?{luU3a7u*vrqQ z)z?blWvGSd0a9_`(*>0VY+6Jv2|Hu}>q!oMtl|YmFdzwMo7#`l zxw37~xFUrZcGuKd3_j&_el$SY?AArGK>SHaXPM&X%A*Rjwu}aJa;Jd69LR9?3|p|Ki_OxW`AL1GJT&|p5r=e zyPgZ}cji279K1Bb?TQj`iIs|;_4z#Adr!yqjP7xV+p0gSI@{T}{1m2F9@j`Zxm<7* zyY(&ct{Wp<>xPVL42-`|?sR2OVzd7JyJ zZ<4z2?Yo5~HO8#|hNqk(_G*Uj)f9Wy^Ezlkl5I0baBc0e;OQ1uOY4<~3%=|Lsm@wS zbLkF4^uLwezc(ntRa~bz@5#jTbNFtx7dzc2gi_wbypxz@!#d&RWo_tmtFtg;-BLH*zhCN~pX>65jF-FhTJ(RC;l>Ot*TPrG{L@YA zhuiOZ^PQdzR9U7fwj$ArWtThe_s}y>`Gg~bYPVHFauTY1^a6zGqJ=h_5Z!{M~qqAdUK5W^K!$FzM$Y7 zIbHKnmM=`F$;)GhF#U(c1I2f zpEDB^;@5-3qxsbQrbi#bi7-&PX!AC@uNS7D)J>}`xXOXgSyCRM zCV?Z*@%Kypk7v6(&L-+$HUG~`;_>}J;g|mB2-{yEi9Qs6|+`CqRK`C2g(1utdEI1Hm_7IlzeGs)R!hfh-wu9u>}b} z#~-g-6Uye$f3?qJI7=^LmXC{L--@j(_NJM89he{UP8zMtI_uK03Px#k|GWCw4*CuKA&Dt(!u3Q38JxoH0}s zC)zYCn|rK&^5{&9MU9eW z1cPM8m!z2M{9)g8%NJYhcGpNVTctmxVr|{?7VD+|=!X@<3;aTn*%|^XzC6Nix)zcJ zA`V3LdHhkI*|RhJFmOC2Gv6gq>`Hf(faOi5T`G$Bv>3%^kp#Fc@(%VyyZo+oBvv)c0V00f?9k zLC|m!${&II(WjO}BNN(7nmveAWVt6NGnKsf{&&|p_$9rJ)4$A-Nzq?npcQnHsMve$x`(mA@aiU+Q8Z%Y=|XG4#|+*5P6@&71gy6f3_$ z8VV*^1V<_hPu+4|eIP(3A&fjcn_jHC z?Vqq&RPC@(RBhI+wa*(JpXeXDWwCx`(qYr}PbQz?*(o$tW-_aJAR(D=x>uWeGi@v< z$0jIbPe>lRYv-tREHyy8R^6Ktde5J`Y?zJ=BFALGkA#RN@2QjOL3%qE`z8EUZ4cpHGPA9}hw19!cC=*IX z*qRaN2F)9DCV<~}@u|3TQ+_t>6cVBU^zv^6Lx_PwT$AWk5I-0)#k7364}?i9 zBd!A@#Fspe(I|?DU4kPN18w}HG~Lfk-qDCsBxW)X>g!?ZRH z8RrCvz$n5T{8wTb2#yXb9<6J!A8r+SIK=!Z3M_^vhOho7(7 zEK91Khy6D0@<#D{uV|m-KKFyY-#h+7>_fwwJ%t{r*A+#qFspoeSbuT$4S$N@S}*G_%X^%5NP~TNh$4ZJHFWe_~AjZ1a72RSAx} zt!x{N$GS{WOr5^5e)FXty}c=dJ3M~N5?Gg{w4+-l(egLe?InCZ6Bspt*+eSJB<)am z1FrDsm;_|N9;`{|VI<)k0T>ijkb^K6+iPOAhY%6)3Eu$74b4wZNkLcyPYA*pc${^U zCz^oR$lBG@VCT`qI2Dx8AjDH~@DCCL7ij8CM63tbOI-hG`T(iXT+ti7NB6un4(hnv_hk4RP!c*L84ie|2HNvGwaTO8Va; z$=3X6Xd<-2CZRtklr_gfK1ep?XHO&S^84lH)I zH#Bnpej=5BUsxoUBY7W9^5-$T0xh{6%%}VHH@=OZoqbU;etGZ3qQ*D;8oULn8@DcW zZBX77`0?J z>@SH-AE&yiSfsGB^2-W^U15ABc*clD4D_{M9!_HTM&cPU3c@2U-{!UCL3B&F2HR$0 zZZ2I;Z?H!jO(s`(h)c<&tp!gifw2gwv4X`t;K~(}OouB;aeU#)=&Luvuj1piBRwT0 zg*QYO*@bBLwiSR98s+9;C_gpwjVbOxAl_-xfmaAr1l;X!l-c0*cOZ#w>_m{J$U+Z5 z8vl`LbXWb;nL~obEPv-Iwzjqrb3NY#TrSkvLY4r?Y}1bvo<$-tn}lxjDyQgpc-dbF zw_PmU`O($AvPadYoYT>f5*O2?AzHc6R-v2vK{=q%I=6N7`uyh~t)pyC8=2-!*D|QO zx}E62n%N9MdQ51PxTvM|E?I zsB;?U%cHby=A&<<<$d&g#ieO!dm>;ub$6z}ro*C@&Ze7d7Fr|HoP;>4UhJgBhmGl6 z*jPUxR2RJzU9V%!&Zv~~j#uuT;^e4qXzPrEdWQUxbaLeG#7NFd)z69-Hhz@G|F7J8 zFY@b%)0OQlMT7Svd3Wv#a;);&rzYxq;LJRqaJBT(BB!W618w|TO?x@b(o6dWILm5Q z^tRDago4)LJM0RQ;%9r|M$WEj;h1r&jL*}l2k3Kt zC-enL_>af>Rt`=xsBKwP(7oB#v}5zvcXwyBT%Y;}Y7M^6a~VF0)nC?~4<<*{dxcGT zP&rbO@fqCDv5;6`Bf?9Rk{F=mkiTo+@FRVa38oNcF`vowPb=>xT3wglPgwsP`xT$m zu0A_H_Ri;p9cjEdq>mSTh>S|t(sVv^TQT%Namfq2?JFXEq~!R~E>U8xJmWg3C6iRP zA+qa>qMXY0qt_jHq#1H2{f}7l{kX+h7xgON^pHtSh?er4ee|mBe2h8gy;F*mB~7Bi z_yNjHX4bG3U5C01MDAdalDbsn%r$8gQ3eGX{0omH@0`&z6X&}sqmu}Av zM(1qc2!1Ej52SuG34S1h3SyBsP!0pa(*&|dTPB5uiF)P;VXp$8p4D5P7D6`Z}#`bqoMJ*L6AW_}U(GA8+o`O!MuiSas+S=$o z6dLqk@~8|7eZs(MhKaJtz-I|5Fai}T2UHfy+tyYQt=KSRbz)zv?N6KRZ}YS4F+A$^ zvi4VE&0aE>iQbk{%Q?yRJGphsAa`n@i~60^%#tJ5_OCdnAG6v}_vf#7vx@j=AOIb(?uZHX78|*$k`XITU<(ByD+z zJ7CU1@I}Cn)o1T|g!nyXljR+JVx9VBa&%j||FPja+#$23A=+_Px2sgj#t(jr=ovEK z(rEKBK`+H7T5jhf?L#Ibbzfl!G(s)g=Egw@?{uF|FR$QL;ZyYT@{=wFO``SnStAz3 zvuCZj`d!vup67PSG>VZEq~y9yUCBRhW6J&jx$CBEGyii?Tac4`3`M#)F<`;rqvM&N+tK$;w>OiG& z>7ZD9EuEf{mQk6nwXO1Ol*sGLOz;x;A!XW;F*og})gV%nG}Yd_vpv3lNhX({#cTCt zi$3uUpY)kxEOz%Crar9rQ=WS=sL)S)SJh~g$E!hA(Zka2TN4LLPM`DOb7-8G-KIPL z&7q(qTEmj>=un%j3rkarb4k~oUo2(52Nstl?C8DF(^SE=UH<25I;$J8r=GO`jQk>} zDR;)dUo1d4#CvZl`=x=XFXh`6f4;6=#gPBBWoqRE8#*q*hKGR_@`YY(X@-Tu%EvB; zG3dx_e&_bu?f3)QMj7tR{Im42_p9c3;=B4oTGp2HguK@`&~sa0%SW*K-;?N>L5SZ2c@t-Sf-s%2@YuP1;vpX%l(L+r~d!(-Z-@nuUzHNHlUi)v{T% zSHvJ4$K!349QD&!(1D1%$AVv=agZm&z21-V~ z_UlVJ=%gB@-u4c?h-2LEBef^Sp5H|~MMy-EUwLqQqW@!lja*HxE0OQ2y9-Q2{S8HZ z7({k!aYXvm+y{`qV6r|q?hthXDu18=1#m_9(X=$ZzZ2YI;!cX+rg(#t0Vf*)5YYe^ z2YJ4JaWNDR{qVk+)t4oj*U93KAR{O0EmmX>16*D;SU`ISgliJwi>Yz|3Up*jv=-N( zqLsz~nRw3r_Q=c41;5eSIRrHlVM}0Or;1VlgySptM~JPWB{>W@=A}`2r#9ZW_(EG- z`%f-w3mi>gy2^;~|^=UwD|t76$G=j|>g>Nv`APwsoc^w^Sbt<0>r zGp%Ao!^g|ad-7d{)4G4UUDE5gT=1YR;HS$=|NMju zR5K(MD7Y#7WmC`&|MxJ6P?Q%NxIau5FYm7dxw(ZX9IK5ZX zs0p+t@`RxzdV78Lx#~yQY*K0wl}Rc5#z^6oxELQ9=?59r8val4i0kGU1*rkPEZQ|5 zt{$wq0$>YB`tCU!bqE%jQ)MS~ID_taAA7SjF9d!_65wc1uU#s%|^{p6k4**0uy8Y zQm?Yg(ya0ulPLj#vi3D$VZ}QIpSy~$U%#CzHsRK-V;aw61o;dVsi)2aSUJ+G+Vozf zIPU10<&6O0T6MtMo!Q@NJnY~(6}_Ta#$8=j8{)2BWZtp5xUJ#bzBBX{Rc>M4ttFZ) z)uj(hJ}=lE51ISacIlOAnCGjP3!koAjrQ8i7hvC6a??n^qERwyY^*)0rE5IvE581i z`^wi!#8QexPXHXU;+!#ol_#O6y!P@U1WwyWLJY*;jX_Gq6+>n7}OUoMA&XhvU=L_$W!Rf#3;9@+`V-PM|q2D1sP2hp2dwC22*ud;u z5slzAz}keXPpl~j#R47N(UG!TZ{mZqM*dVW(5`bJts+3Y?dv+SH7w7^YyzXeSqm}> z3`suWkkfCd#Ou1L?!}{(e!(`5M!UB>NMur^Q#7CcXc+5eTVy!eCCg$IyI=KQCZC6I ztlo!j^V@bnkCBD$2x1utQ2sNH=P!ItHz1@dG|qt-;8Rh++hl@C1u=QYCVgeM=o>Hu zPjN=kctbuV0IKjiV(bN1N62uAB9XYE5+XT~38S#qgpCDB#v3XTQp!nCArT z!XH(3C-`#U_uTQk3w;0pIKBIWgFyG7`=>?!RR%H-k=Xp}c>pdS(JBziJ~CVBm6Vi1 zdi(qpARdQ-1M|Wqj4HuUpoWZ~@mdG!O+xrknH$=A<;oSp$^eL1P83_XgGsn=pmIE} zE5ePngvgx$-0*Dzz?}o*KIGTK9)IP@q{=E_WvXXR9x;H#3 zZy9&1eldDyXS?B+C+Ds1nJEvwep&xe!}dg3`F4eZxM1b|EBDO{#P$z{Ej*4{T}emD z`dkpe^zE*O^>>BOCNT$GN;A~fOMCil$=$Eq>Mor2PL8s{j798uYr6wqP&D15*p&SK zwc9-R^!Hb=icQIW{JbzLntgTg^3RN4m7mx|w&fW9sEF{cuIk80f1OqI^4fgcbAgL* zIvnDvW?U{cXYmVMICEO|j8vR@*x<_Y=$Eg}zn{;#oVfgZ!r3DaomM$dHr%dyT=LFI z)6wtf?yQyOo-B@C2L=zWy-aQ6>BT74=3Xbl-yF{uReQ{p}*1`i_j=CT-+C=aK7^Sw7Tn+hkwR zWqqDG?~&i78bmWWqH9oNVD~7#G*iuTzTnl@45mlV`faDA8S5)J|6Z1T-%d9(*8RG( zdN7ujUtmkyJnuXIB>}XuHM5V4ET^1k_R%`eFZf*FsogEt#r8f14P|>mj_KT;g=m>s zzRqsnit*8yZGztB-xuBMpHqK7dQ-~W;34n6)`UGQ2^x+s%gE(jzs+;sH~vMz$%M)M zO1mD=eim>1Rr@|f&|un)O|oGjX1Fu?aiXWkRwL^hF-O3(IeTvx+ob&#t;8*!)$p3S zA$VO};+V(E*AEXpHsgnJAu*eW153)%u?0Zm#Cry_bA$fC50Gw1!xaDqR#0)$*#ct# zXLKB60V36Shnc}XDP0wisfe95HW?H&J_wHxP`c!e+D=$ZtW0QZIR&g@@a2l(0QN)hMWR1DarTKmMk8vVy5)e!34f7rGF%W@ zV|egQyes!_2|D?0&+a*i3`u^P;?`|86E!))KpHi$u)y6UfIKnBBpWRT z2>08|p>iV51DH{fL=i#>#&AR%Hi2X4NO}222`2(lcw)dt^gF0A3GeFbv$N>Skf_Ox zQ5`tc(OXZll5{{ntJs}h0u;g-gDb`x^J56$3#Lw>5bN&wvA1l*PKZna@D(*gB8b~K z?2pw6X#o^6Zqv;aFz10(Zi`oPwH&i_G-H1P*j6u^u^@)%+(jVL1zqzEh*8T!wf%sr znxiwG=})@8&*dzHamug_U`sb-L}W2GPkPch<7}XKc}-Fq7`k?U8-+kZ>{(wZJx#Qr z=j9Av{v99OUTIS87OmZ&v-rZmu9vEC+2+vu&ivP&i1}VC72&w~`*OdvKIiEA=p6kf zp_{5FMYbP_mt)|NmUZ=MzM-PBpVs%PhJ@5at76Y5uY3lXjWjDu)bfHgZQ@cZcpvC9 zr1<)k>V4%EicmdyW_9Nq`z7IHB4NBqU~n+?b<0glk+W=18@m(Jth}2@O^qpZk@_AXz<86}J8v zhvuf`Icb(*?%)ge0r>j_frEH-$*dOVEP;QCUK2;}E^s>cx^zYPfIn)1!k12vgY;7^ zdUbyniQAt=5+Q*_62C|FRORareA=3+Zx4uR*C?*~!TgIkeMF>uFiVL&y|OH zuBKtVF_;hjo@&+oN8Z*k9^yD?XHpaN)MdHYg`ws;>*|ZXdjTz{3CsimX2=K+>F06U7DjEd{MnyM;@nihvv;~LP3GoAyKZ4 zL(3@yJR;IL9KF0WDgeN)f~X`UB~{(6lx}E0;r9{ncr-$6hyn!HYz>Xdq){9Y*)X-{ zl@yF%h`Cf`-yH>~6!IHDs;+?4RRZTDj5W5|&P&7go74tq6^V->Szs!^TsCZmUWMX~ zVQFLwlY_Y<^mVYexOK=FI9pB8Ro=g}CreW6Fn8P1&O6I5o4(=5T=s2P)VAy7<-!#* zy5Xr*7nGIGk65g7Y2T;i9#CffLWm;wZSNPYfS^j#we&8Aq9TJo;D`6lQW`gv1YyD< zeDK+clP9sm@wjdY?fqPCL%oFEb zld$tTiW*_`9uNnB^91WIiZ>J$Kc?NrI+o#)B)V%jhgA*zhG+sK7zbH_6)Toeh_GKR zuuciKJtY`E{+j!8wTMk&^{OF0sFvTK-?dBr8`uQmo4Az!67${`uU`@2Z})@>^;>Zq z&D-qMYx=%T$6SjW9z}gHU4gHCXYOla@O6&z(Ua!wS%wdKx~1-ad6>aJZ;IwGV?(#d zMw=$th5HtJ!n8Dh%97oTjKQIPsftxF*hvV2MBamSSBl6KAC@)ps=sS;Zy||57#3WF z(d{R+;qOxtk(Chy`U1A=GQ4v#>S=xHc6g^z6y{LyizR{}|B=Ym^E8gPZRi~jXK$1k zvb~DEm6OrGZU*8#%Dvi0w_Uq(a(c5Itq<&Y zH`o3B?85mj%QQ7_25YZ3Z}8XD%*bUa*KCN5vSv9lkMv-Q|rYMJ+@GawR{rc5%!D*JT!(9#6%B8DH76@7o@ zVs|#-#ZeQW7m0J-?Y$ixdiS5Be4wY;;y4fPg&r!R$1p$pWPFio%407U524BTe$k8< zS&zN>^I-JTEE|*(>zwG4@H_7o!~OW#geupr0Ob!?^~KAsJ&j{;OlK_O+t*%wG|oHu zB*&IPTRthJ$|n^}JFvQ}J>a;3S@DC#8`E1=mX5%%&$xU7e!?h8#xTrV3r!HX-x^?s z5JepJC$6u`AL6-K)Cr7jDrf>ps4^7KrBMwAs zhVSs}L+R?DfLWIP>D+=f^@r||gfjR)kz5Isl^dtr@$lw`RQ_ooQ+o`Xi~A6>;jfG2 zLvo`}of=TlnyTJ0O02k{Wem_smT?F>kop!SCmEmfnAgI>7eUb8z|>YdsNjwnSLgkq-=>~0Z2sln^2E*$%94BV zDJamv7GLpLrDg(;DCLm{LFdIPkQR1`9zCjvvvJ4p{5 zuCr-q|2|C{93neJ49*XkJufLRlz3CnJCZg~mYpgI9o(Be!rJ<`##8Mdm!0UmBhDo0 zK3ta+wzhS6rs)4MQZ%^&<$S5~dy$*>4z!tZsK2eCkR_xxq6))~)BzZ=9J=Dr-XaKO zh?ge>FG0XvNhC&^*-J%Ox&{%lAc#Bgn%p)x4+0jL>DykF#eJ3ZV5Olm9^z{> zv@nn)M1<0?o6`g9DJ0;{7$UBiq=dVxVMhxDCJAjn>B)#AB(s20v*iVjh~Y0@J6OP3 zN740830vlTCaQCa^3>QB?#I1W2KCA} zspyS-iP+!WA3xqddF3fCw-{U@P#=_%p&JPfK!IwDL_Hz}!iBxd>`qk-Z3$6w5`G8t zm}S^qJHQ3O6}tN^bgM4773ZghjA2a6PXHzSqt5T&eW7*gh}2b4BSqK+C2VDc9|@-x zW%7j)E)Ir6F<6W^a^DVu_5?@loC+%uL%Fs%EHf-OgF^~?W<*Wmx|ar7w&Zm z(k$*5dt}?6^~CXnAd$kZ3IW-kNeknrzmL(+>lcCy%WCX>7b@%$N%9A`}YYxyZy+*}CD(UPeLUfke~u zptO0ppGKDRlOsnw<(JuKd3T1I)IH3Id-~zN2(H7E$&0E?$9I5uT%K_8Q+SrNd#A|W zq_=o#w)4jMtajM(opvYPNX5hx#bs+Yuea*z5mnkPH>7y&CtqtwO-ad%fw2oWz9(xx z;%;lEwXDmV++HPkC3MfhwFZ*K2gEGdZ~f1%hy&&U)C`2{ZF0+ObLJoR@vUO4J_^Ot zGeywEwF?AA8~(@t4N)pAG<23xu|2`&|1MwP8s*-n0kBP=n7n6{DZJ`-(bh*s>&-tk7@m55$2ZXK zXwtB&T60Di2lMHU5DS(#aZPrkOu1K5h6+(l9+{s8uq!+{DrqQwD6#$I(X3m0&;LCe z|Ni*p-c`OS8a=S*BeUu{C;h}F zw%@ds8H#CtD=WDwR_EPGuZ-}lx|?>U#OKpQYL}s*VN87B|Gg`Z$y>X}X077NZdKRS zODp5C;1D%{cj+?oWEs4Gq!r?uW)W?gr}V*9{8(n7MBw!4IxLQJQHQHyl}1}qCu#JW z*n;*Pv^s3f_$V{?;78+t>R-yjxt$|EMQ1m~gCIM0So&?eYnZZLSFr|E+;-iMvGj)J(NleE`L0Tt-R{9?R0L|4W8k(i_thAg3daRk$ld=b(U3;tDvZ8iCuz$jg6kdr|8T8seAp|+aG!e!4HeF?xows`RUh7 z{>Vovkp}m0ymHgyXr)C2nOs5h{rrgB>gk+=CP1`sya3KY|L5PNTg3 zLm&33y4pODmzx&-e%&S~XJ;CUwRJSBDEL~`6j%*ZW{HW2AB)I1wQuJD#|AEmg_mtK717(#Fyo5m6!5nJzT6H`Ju^!G}~$k&Tg$ zg5jY-7?)tnuzD1@W86r*M+os0xTf+iZj}7R81V(b_JU|^e~-Uj{LMs40A$`0X#HA~ zSrn>Kgi7^|^S}%PbXD#UQ+O(q$_ammn25rQtHIoj=yK7fPDuQ*R{3}J#j9~C45eQU z6~8SNJM?qbnOp)?E=cbM$He~W_>G6KY^J3E9DQIih#KPBfy$EH*G_NaDMAdzzxVDb z;7K^$Q(19mM69<19TUY{dws`lL&e35qO5agl0}XsoEqJ& zg*fwwt~0OWEhD<3f*6-ngt6;-k$mL3q^18^XT-|*1U zAo{U|p9Tw4fSJH1Bph5ApdrX{4X{);O0BKz4A`C|N)BEk*c&XNw5_O?YEMWcQ!F_4 z5q}|A1>GDh{7+g)ZbYTJYoM}nCN0&51acRQWnE?@e1MGJgTVu-B8V*(su%84?M1Fz zDJ195C@m~VVx7EJs9=q#&@n4+W_z;+R_N-CGOnH~S!_wyw!(mVs&!v!og2@kocy(B zV|js3m=#%WoUW+v4-ZcVqO)q8hxgsd#mTb)jr~gsN3l=@?pn_KtUMMKo2)V2aPh;i zd&0tO^TUOymnPI_T#`qdT?ZQ_;g?B6HZ+1_5YJ?6VVB^HO~}UN;$l4>$u?i6^1FX6 zF%!W~gvClvaYkq*={GQ#CSrAKv=|W7!I9k?@ z1DeXR=}bImvvzJ#iel(Fh}7SF@~#8@MXeLz_@J;D(0Gw zXs{`Wa|o%n8J1}5LA1C|c*NLkqm^7`NYoSXw(sZ~n)6Q|+jC}uRs|mSv8>0INW_FG2cBX>gr}l^+3#30~3H^j^QimNQ@qt1cT*~1Xvc%T4n!~(j*jA zCT8DGFBopYo;@1Y)Ica0gtpmUf>wx7`-T)rhxM%OG86WoO$25JS_hec1@g*|6gno? zEyCz0RE@q4{Vx~r&N4X7odY{Quo5OS=iE=Wkm>-mw$?f z(tq353u(|LIm8VLUBlpO0fb`_n|Iv5M4(s18p%+c3*p(!*jk9^g1(*qOV{9dy9{R* z&+T#EPpEtz{WfDPYW*B)x@LJ8<8r#s7u5i$O||KiwzJ6`+_HxA_M?)F$zXKyiMrui ze_Yz|&|qlm0d|@P)`N`B->Me2C91Bkez#9jT<6^fv$8(nV`N0@0qhsXkI-CR!JsEv zt{n*~nAnU>PHMnXj;QR3fCGOMn+dV6{HPd)O$lg`50;-5WI50-@e>FfmTi%9eQa(i zsi|wXZe2+pOCbj0S&YpO8LPz4u_Nwcn=~vgw(wXcY_aeyX(A3x;d8?2^GLl5!GlA5 zcYAneXehAHeUQR?(H3%@QJ52bbIkJp_gEo6rxl#~13Kysrez1eD=65GeyiSA=^s$+ zYH+Z$q7lyXk|T)9DRBWk1*_QtYLO#3mb9=?UJe~SdQ5ISB^uyxU3I#EY#GrL_Bq|o zN;mSJ&~;(EWALcv*I%w-^m8+t+eU4<^BU!^#wdB&xMIUKed?YL_}X_h+g8!g8N zE{R(61`rKA_fpZ0Ze}-q;S7ZjV=%O6y;@TiwkEycc9hkVnhS^IA7y^uQk1g^C&V-L zoB6s)T;burqS)cy@$BZhk6aupl!n%gWk(jjHTzw&v^2Y|eF;+nz1^i~&BCUqcrUKm z{Alr4tfZdD7pH?)b4Gi3b-#^%(71m7r@$e$z;EPTPc_OU(vekWygf?tevfSy~jAe?y7+VRqG^Gtji5`jaY#x{p}T+rHoIW zrk`OuzJ8;?1N-<_PQvbS4{S}<)Zaq2c*Z3~BVv~2OowGeabI|Vlug`YC9bI%%$%gX zTPZeb_D^|v_m2y#MF~R;;6SYL1NMzH`A^-RBnJ*)cs5K52`=$_oOq259mf2MQVg8! z+M!+XSnn`;Yb|nJiH)G?`c4dcVTk8SK5%PXS1KN;NrUN>v4|K-TUyMpxSsl_&~RYj!khP~p*Vc0C^SlZf^!6BFw7I?qvN8YVUzAG2boug1d3*rgE4zvbieZrT4 z>qQD%KpEn0U^{}(vw5nJNKHP27FnTZWp=oJ-gGyuS}}~;Sf(an z8G|-zpw+#>QAO;J<^hHN9*0W}g#x(Sw0j=cniO%weF*2l$?RcRS~ZPYg`sLE+1CEp zW!u~*_viJJ#EJi2#>ce&=)m*biI}`bHq@wC*K56ySm~Cb0lUqmgObV}X=!|&O5$Gl zq^B47YwzDt4%1BT(%eU<*l^C z8a4@>si?XF5QV3BafBJ81Tw27)B-$9MC0`!CB<_5dntJaK%CMaa*{(hGxlXk#{|q8 z5>Z2wNX8WqWMV!K2US`)TWilr0h!08Cu5p%lwkLv|0Pm2%!1%qGn?8dgJMwv;YWD% zh*23KE0GEeVWDIY5itqR+5p%bTH!AfLM)DXI-zGP35F)^Ie#g#Ne;Ib(Pp*wPhqG| z;*$uwo)El%u=`!Vt{c&}?jH>C{>0(2x25QTK`kOUKk+t%?r|j$DV>Ix#~4j+M|d)+ zfUtu`&i&JA6QeJfG>4Q;8i?N{#18cEwP-ZR6c6GzsfhxuSuiQsSFJLN%_IxL=>KEw zzvFsd|NjB}Wkp+4g(M;^DO8#gk(72Sm5gYJ_7-VrSxKl=(w-D)YZ47fDQRh^A=>J9 zznpVE=e)na|9-dM>z}uC-bY@ZbX3M&1*cUS z18$`r6=9086jmmA3zs)?`{BWw%YDM}QEg%3 zn-6$@vuZnEv4d&Li}`a!@r}PuN=^RmXjh||Hxo&^NW0aHsc6J#mDZBGQFnd+Qi#me zIHRE2J+zEWdb{JPUpuCE0bD!z_HpuuN#qM*^z%@Xvw6d0ej+iM0QE6Lqk; zq~jDDeA$(yc@t!pMmUgor#~M{X=a`5W_nuJA3TSaw!H*T7g$8ijW~!~J>MkoJW;%0 zYX{jy?2!b>B?1`i#+$=*7hmM2AU_cSgFqv>=n=3^8mHiCUXDLj0SrmRqX2ozvgOOuPX3Z^2uwvc{kl-_wD6_z zi(H=hQOnoGue8be;gxWp*SW!K+PVTJ##O6Fp9~gH+1(@49Uh8p)9Y3)o90fsc|YD; zt;^`}_S!GU&CQ;&8?2u_GM3CbyyD@gk5Izsh3!7iG>y(n=8828t+Xn5L|~K~MsL3~ zAGz`1qAfB>PKk=Jtgv&N>t3~9ztZTh3=V~hJnBO1;RBKcI07Ch++Q9 zHiEBv3t4%0KkO=tf6*N>@kCrdRA=jX4giuZd8)LNduN8`Ty?7F$D{e&A+90dRKWa0eBljCJ=Cg1RK7 zr60-`7n~T{L-LKaTo>J&Tlwy`l+3A{R5aWxi~1!KRdI*tU@)YzH*&u`9>gnG`fq=hY;e1lCmgRH=79y<5_8kSa$_ zbm(nFn)>CZ1{uzjDKU-BHHqsTd+qg9cyey>HCg*83B9h~mhPN%i&m}tjOqL@R;BG{ zHfwDuIkMv*CW5(UyF-@pZimZxg=CUm4^1AKo3+?DUOL7&;Sp_d-YDL=SiemmUOOn_ zq{lmk*F_(z*~-@E%djjv_4nh92&2U6$WT&(3SPe(0AUH3*o%`_>#|t^+YrYAxVe|8 zXhRqzK@%;*NoNH6xtddTw@nl3vP-pDm>T2_^j>&|%Otx43J;j$Yg(eW+P2UbFnQ_$ zSHlsDC;bDej@=8xp0mwDY-OUE7X-@jT;&(8oD%aCc<~%dN9(NgZ-gt8q zQ7?uTj4Hz(fQeEs}Lm0l@<~1uws$G6fFUFmIaffq`VX(l((-B!e)9J8ENC=;|ll>>k*YeN_{hgBl`6#yG7BAS4et8weMv$0XLQ=`f z)vFEjiTWnn>+K)GPr_Le*i^T2|Jw24U#yQjZUzv^OY@O)-)~n_olSCBvhQ4Y!IDtA z^y{h`{l|TldJk*%i|lO(nCw_`TV(0B&$*-U_M$&q6yB5PYC1Qg;>t{>#eeopa2gck z3@`H>8e46%deQa&P!hH}$Ps@DcCz*o3pvIwQWt<7eG3XDNy;eczkzlV;-!-FwGfVt ze>l2xKh`yHe^_WGFLhE#LDH)Q{_8auf51yB6(<%%3t>yVgXaY?n5%Y zJAwZYJsOkF@~}TLA@!P5yk7oM)NU7_b_39)bk5(Ia88StUJ%sSUa)}o58|NC7u95y z;MH0oDInh2FLa=o$NYKHqLr|BY6JD*-P_9}k`B6tSU-_G+x>+nf#)05!nfOB0#$jh zEktL0FXytK3O}vTm^@NB8+7Z4(#+F`jN3*%z3f7>fA%nH?s2x%8~rglaz>R59OT3H z29Dl8!~Wb*3eqAj#LWN3S0@>skZxZB?@k1h6!Hs#w~+vGk|c>;_V*(MT#`x8AdMX; zdMPxs6bgD}4k06A0=FjynY?@1h9-YyOsE!9BRP&FvXgiM0v$mMFoAo+IkF{8<(RKq zVL^eYU7spBmBGs0yjt$+7m;WZW(`gnj3MG(h{T-0gk)dG^hBU8oXCJI5R+w{?6lDd z<90KYFs`H!GrqO7`%LDWX*v=u!N#WZDIQa_#x7YwsG{gR45;9_(jNR?9BGlcaM{u{ZB%azg*WrA{?2-q^;N9hoBTY7CbI zk66ZA9{BY1ZosYtWgXR)SqeCj-!tvy8^~(OKAl9h+NIaU6Q=(^pV+s8KvXgcr?^n2 z>V?q$;_8xq1fWnT(H>I~a?k=oE>em`k+f_Rk{=fAPd2z6oT!{&JFGeN@XRivLq1= z27OW@K=eCAJBWw82SNU zr(}AwqW07=ycrttj*|vK@Uu~!1fi0A=qXg$?}t$Z@??{Ni67UZ zU4A(IT4|eqisqUZ4m4!<26%xXXDvw-itYXK-lec8G4p92@Ip#SdwctNLkv97O(PN% z!YhqgX97W_{+b`XIUvn0U9^X+&S9TSraw#|qb+VCoGULjFC&soKwv!{^EyVE%9z9i zu$5*so7lR@LHeSlMru7cvZ#q@{1rSBI`N{N5Wtq){5epu~G2yDA~8&UbG1iN0g)k$nGcG)|En(^C~Q9cB}f-#coH%(>5)z1;%P1v=jo zgf{7IBl16MJ3!y_FK4N*J-(tU0Rf;=6-kyFcKt-I{>1#nv!xLI>8O~SiD%AQWH-!! z-E=Ok`%1$W7Ks@=aO2cFyYY+r_8O~IVmhpNkiY_@cJQ%SUXW{1`lAT0t>&K(397x3 zKbriI<|5ak3C-$=>TS~IbCPeyqx(i*-nqZxn1N=@{7IMFKH`jG1i3HRFcVjlJ$d-TM>QRnuoapV^zGnCveHOi3)Iaplz~`rpgQ7SDBL{*+?x?pf;gCZy z6&s=cMl}GH#qLv5X;buO*Rgfqgteu>kEW@D>a=_moOU!YMITnH(;PnW!nyU?b z3OESr=EGZ=5hdnd;UmFPZ(%3@<-113YP(M(C2nzZou!5O_azHf_ocUAXpks9>dJsr zrf*ZX`JLZ6b$F~JdpCr;$bU8Igh1zmO&b~^ z18H{|wQ1y}2h0=RLqa-08cLW+VnfAA{rra~0iTV;*3B|Xn7aw-=Wx;%&#jqI=RVcYZ`1X?u6V~Y zJJw(Gt}Y)ea&8`0`Pvz{V=ZFaq3$e4q#plx>9L>GZ%=aN|9wW&PCkv3+I76B3PQ`p zJ}MSO9(udOCs5Pqf}+iy_HV~894nTLWmyTB z`i5VUg^}wcpHH4s1IBGkwKR?^(%vmM>eX*)TbJ?O|DbDaamORfAFq#yRL*~wu@)<* z`&LAWh|5uRGRZ67Th{v5TmaRn71%UMWCa1(pk`zYO&Em}aK6&hc-=uQNO!ZDR9m+ z_%DwY>4Kk7qbNW`|4!~=6dD@{TTk&qeRwfe%?ddZrFf!wr=8q^>>(y284p|U`=e3p zvYxnas&!f8DWO8#)%L%BzHW=Y5{EC^3tw?On+AXDHjviAUX4UZ} znkaAW_l-_MuOYB;&BzP1&+!rpwiZfCzk8$e!jgK``-1dGOF}VG#exY0L=(}o?i^bk z;e7wW{NTwnwGxubvhm^7kUbk3cGNFZ;gmmUcw&`{fM|xYPC?H6vg}EZolCZSoN6?? zwmo;B3+(%JGmdS1Vo{l0>Qi&O`{SEGq^|zWgH(S44bO=E|4&LXAScW4)46|~v~|Ox zgM%k#9u7jXF)=Ygp+u_|{gLA;LOSIr3gD$LqQ4lJFX?#wFjkDgnvhczQW_BQ89rZ13ATM4m4N!C zc90q^Ty;WuA-L#kpq;Dfi2#2ZA zrg2h2xMgp==sJiVr^3|0X#Rake8}i}%n) zXYq~?gWQKACGD1M>W-TQVEQc67(CP?Wv~pIgzlC?d;65$>yAy|H=jM8t7)Oz#L!_s zS^BMG6PJD0Nzc7_5vhqh$iQ0NuJ?#;QlG?6U4umC(*U=ed&Bg|Y)sq~pI!bTyQ0{T zf4$At))u;2`TMd}4ZEft7Ew#0IDJID&cov)R%KJQuQKN5{Ry>MI$w)C?-zT9%H5nr zelhzMmI9et4Q8};w4fP_Akze2Ls97re%PO+ZFtp*KAH3YfkN-beo7$NU2aoWcdCf@ zA3|+XeGb?i6W+iJG%S#21e|fGZmypZVnCV+f=yOo+Ns3bCQpEw^G(TYTbZfJ7>OMm zQU!wl;g8%FdKp8H#~OpU1|Gchkn<_PZccd&EhcnHFJ`NVwI!nOgz(xsS6UdHqAl2E zFTRe`cbAAMmK!;t+dq!;!@J)i7J+a{#Hw^%{FC`5O5>LM%SoY_CqX5@+(z{VpM0Q( zSh`A9>4!ldAIV6zF!Li0hJ4Hc|3L-?tJ(L28;hATflJ-wO&DfoSvb7(<;IK30; zZ;WL~TOx|zzjEXVgb>u%yK$#Y(j~*S%=(A>V@i1F0*bV9x9<$KW7WcOi}b`*^20>Fk8{myD!f>`U0(+?DSl_y~4x0G-GgbD96@J zg6*KdDr(7r^MD;1&T{Phevry;!~%XzErBHEjYlsnti}=M5Y%b3kxMXR{2o4u+%1y% zNz(rqxr0d9C1$gVKlP1P%72NiD0Xm8dqWO)Dxs8daf|KG@0u=-k8m5!1-npOb&2Q4<9+YlhmvD9&E`=z^q4S7$k9c9@Ak6$x&{w%}T zCvol_2ZHs!@LQb^!2#9`CkVt_7HV{o4nLxQ00k5b42BET1D=0nFzuO8T+wmu9!RPh zL*AUR=j>UmEeHiC846fbD~K%xL;-ugv0)~PW}#)_m6l$gm6b)(CUzl!m86gWsaC?c zL_bwm{7%=CkSiIt|DN{}SK{9PjAW&K3%tUnhW09|Z1$rJSVV*YKO|B2{rIsFNGO(5 zFix9^etmvHi%d{ZXzihKPj0dxnk``VxaK+4zhb*tXo~!;UjthSs)R@9FNN=YQZ~JF z-uO726+jxJ4AeD{!4!fZZ8l3a7`NWP+W6Lj=<~xup~%#(I3(M6vfrAT9K*X@T90V7 z9SErjT{_9|@`R3b!N=p7*H znr?RM@K#X9?DC^TGX#zTO&JH4CN2II$eq>ge1=_*pv;TIM4OHm1Xzft54}4B& za~Y5d-GCCM8>m|)Yy>0^)BAV4W>Tp&f)W3Gax*eb4L~DUpg4}SuRtY2)kb`G@Bl;B zzxv@`>(}{XMV6T!cx5PFK&`lfKp_xhgw#_88y_IiuaGH`(o6oZx0=R+1PzG&q4o8Y zdY#2p%G>QX-_)Bo+#lE+OQSBpbjnIxLPoENfl^;$7@%>?X!DNIS0!xz5Mek=&}Svx zJMlrbTI(tZ)N#9eyGBmDnUQ=;4ifiUT3PnAtFGQ>Q)zyB)I{J;%<<$F>v|;pO^w>!En_ z`l9v-S%aXyeQs11!=&qXDqJjEo%b39x)|P#?k~ zir7*GqzaEW>6z?LX&Rb70ofOpRr36v?(mpKy$%x$9ZxnjTI8S_{xyWE+Ob?m%|0e- za3EQesII_Phqt->{p45Yek(k*3l)0cs?U#lzRJDi={G6Jz6Mp{GIzBXBS6cR57+#ja^L#l)|pT1v2n7 zo#S{1XUzLAJZa^p>cq1+oqL-(++U6{i3vS_;q=-c(&6aeRqHDOu;8fkUe$#2N_1fjW+~lZPVm zVDqg==)TNbnSFAW=q29AM@CcQNu@6gOB{6MCUkR*lWRSnwQobMq!tGw_j3O~JqQ40TS1JmTgW?6M!d)XX z7-2n;*h55*5T)I~2&S-YUgA?rT3T*ejAfhCo<0&~gg8}s!HY%88Zf7$7jnm=$Vl9;t9X~;cZ4CE72Z$c7`=7x z9u;o6$C~Fzk&Q2;4vdEbiemb)aJQ`NMyOV)P6H9CchdZuACG-|7S&AWKc#ZQXm%Yh z-~o~FIA~!>C1C^9k9X|Iz743>6o0hjdo!?7LXaiHQzRzXC zmgM5HnV)|RF+f@tApsr5FrEGVez-I|d-kxwFH`||8*+@R06XP>agbnboPBFEpaZIn;TIR*2|z_c?DpD;5O zjShMDyWgeiVQk#(Y-uiZTTHn4o6Fh$u`!$7eSLktaEd?cV}$FJ_$yH^M}(dvGzQy( zqMF(t-P^MelH)^yXD|aQVtT*=4^b`;`})NPR}fB0w?T3(X1`00j=mtFpbn*{P}M|3 z!`B0z^AHm0@!%YXGn&+KVCHGsIqV6n!27Y~h5JJT2KS#o{}Sy3D2S>D779GedtS;c zEljc}p)>K{Pj#ym=~k)958p|SgNoIf!c;g)iUM_l{h?-({XCCn}@tuuR-e~UL6i>6SiQjnOnH~LV)nTLf>fX@7&&Rbp zEzl1D_b&olQ}gd%9}7vkv`3^SRsHjw?;3B*?>EK?47mu-9$LltiiWF1Zvjs`v{X%rGt-2WUo)AVf}t>bT^N z?MQqhDeAzR$1$UrVQnPfOVMoKdWx6Y^w@9I8@d6gBN07#|L082fo9@D5^%b_q9THA z2Kr=(b#~_oZ`pF7I5bbhq==4k&Oz=UJxunVSrfk|3&H;R^-3;3iu?2*F?tZ&?=C|| zQdIG5;nH_C-fWl8TpK;-q!9wpG?xWX%If9a>w1RH`dMN%Z?%UryKI?tqtyj=+xKo= zcY4I`qH|IV?cLdXIeID;_dhqS{y$Gv6RRsXL~h0w-)=yI!ZG!P_aI+yXLp>Cch>*g z{E`=z9CzDR`*7yt@SThEhIFxa7Cnp$QED-L<%++HY>MKJoCWi1MMFjv-<(4Z`DGqZ zzjQJz_!YP9ow}n3W%ZhV9PjiMSpQ3gZ6?-R;@jEdZWmam%=$1SEW7V{*(6Q(K->=lQhqK{pAeBKVWR=yY~zxP0y4 z5#Yx{Oo2Ip6R~l;0zNm0>9x1L#?QKNi<*+Qmw}*ZMV``^Bky)`IO(W}zTDrdf5Rw_ zEpt{yjOwC)Ww#dfwz4Riq&24>=5;sUTE@}#?mZ7egn8Ldq29!5BexeWRxE;KKHA@U zob1ivAp31VyKLE{t8T@ou4U-C2s@?jDiEQ6{lVzd6_GDkY(W*-ZePiT z$5WEkHl!eMkEG-`M-w#_6)_GRA@?E%0y%o;>zQ^VU>MiYc{HzI#a<3YS#R!a?;+xP zz#Cpyq;K`ft7Aohz}jlJ39x&hgv8hmV%WUCOB@6m(db~V#8T0e@aY5Q3ml6(5$v;v zqpr5L5*2F1=!wbup+h0|KR^6_Vy=KNJMhH6cOn?NJW412WTK_r)NLJZ)rq4o%mxV(6Wz5^X98PxwCtL*?om=!q z5=0|;Iz!toAkul+t{oP4<_{~Cc+J($sM|R?OWk+!;QL2jvEmQo1BIrUL*ui!2hZrB z5h?+3^%vq>8QK^antf^Z@?I!z%aA9~8T(uoi8v6^i+_{9)cT^Vg_`Y6OAoV_X9MFzOtBCAK#A(h6xCn z3ETh#pNawj{ub=Aw{BtLqTiDeLNxt?x~V0^wvKbG%=8yKqQwK;e;E~?1VFOQyPW8b zLlO=SO1Q&jr%z8nEJ>ujWPm}st#)_YKlUvth%JmWUh|Dw zE;dm=qhVQFQf55F{MqTI3Hw=8F{$rViZmu+a{f=tw z^-f8AZLFOFRo9G*F5O+fsPYnREax_9v2m^0I*F{%PLNazMn)Tq-M;UE8c?zL-7Et5 zNv#yP>u$J-OQB3Tj`~q-j2)?bjkKqsav&B75L6{tYDjq=c7|9e&SplQ)rn$aB+NPJ zEiozij1+SS7*Wg{TsgV8Cm{r~jvjbJDnYO8A__dX=y8fg#IOXmQ)}C=#Fb1$=Op3^ z#$3$&zPKly&{+T($i7njkJ&eDdzL;Ovct7gbm?)Dn+rSPFTj###K6eN2rtIuF2b2C z{}YB1DOVG6xKny|Zwmab6jIlqpLg46gjL_=bE}8on&qENR4NBs=)5mO+0<;9SJqMS zlev+tv_d&zW>H(FR_>jEee~X@?-S<)8CRlC$wSUC-{y^NszXO|&hn7o2}D=CQH z;=H~xdYfTfS=IceuD5rD@Z)(z&S_~xQjl5FfQF}x{8@#~Iq&PllbZ6nJR?-k^{P~V z`2UyG4e_q~>pm@d@-(X0$x~k|VKml?hr5iY`>h79UFE+Twc3LhU;N56msLNTRX6=@ ze^3!^LdKO~vBB3DMx_+3ww4`lEuL%<3-PzQfkX`3Jr(>TJCNjH5qM+$R6*p;V7>&F zOOgVG^&VVqg5!O(+|FN{i#7;u5#Wwcx(@nb;##yU;+g}_T>Ni8=e zDRkpGhd+?D;+>sU`Q28$kJ!wDEV^|?9B_GRdS(6|M3t0suPu`zBl~hI`H>TvMqk^1 zko?kzpaL9~o_4p$mK}eBBk`3eCuRBY~I0Y`&4ILB)najTR23N(avc;(ypH_UwiV>l7r>*8t5>~K$oLhi$@lWF_>d4J-kk}Nsku>-H`$>#V9Qo^e zg|xJ;TFbPo1hjnCm~L|E{lhDRE_NHlyef+C9U!wOReETpZ_U%PsC1QGUoCqA-tU}^ zgYvU1N_&U1^x=uOcB9S-!Q1z(W1^8^jtpTlXBp=YK3de4eD&8!(TP=z-n$|x?v9;I zYG3LyXJ#={vub7?6rp4JhjzzKiVf=EMCx_f$!~v5(E}?Y)Va&GU)t%fC)bRH%T-XL zVxr?|884G49Agi3a-Ny#^woSC$Q(6e#jJO*Le=X_a$~;MTbA$24iIG3 zlv}0M%Q7I~SNQJdX=$yrGB_|_{*|$(ikkN>A61Xw;Am)BRUERWJVjWhlfPeUr|d&K@5-)fqtpI86ht71Zt12b z*~(?w(ni_I5(asvFRd{5&Jqgtnx2v=e?Ma-s6%9u|9-TyWK(eKKGAsSva4=ny7m5& z_F+OT1YI2R&0S>pB$9CW8c%0^`|h9?_f00;uGf?=aK(8jQCyoHUiWn^yZ`YM?M27i zKX%@&K7`bTS{YGe2BS1Kq6}JenxV#*^ER;jvN1dr2wPX#lrg-Oaiz`{Aq+O(UA^4i zNa-g3KTo*XUqS<>xm2M)FP_;>mL3}lH(M`8JzIie_?oVmd+x9Z;J=bYC`d)E|`@C?H zExEpt@zQL-U~f=#N^<#+#U|1DisZ!a>HKI0v3mH|YcI-Iz8+o$9(K;v@^`YIBj1O* zgIfnNNvSlmMjD?F^YOkL>GSSS7ND=(^z_2`RJuMMp)_sIe;*Y5N~1#K7QDl6tp|T) z?^FJxy$DM|G5LKrBOizG+i;Et@ihPbwn25ahpEykxHN#Hhc9W_<8;3blWw{dp&luS z<+66-=gZ!UcmBZoSDkUDJ9np8%+2>|Jm5mjrfJuk8Aa(q%~=hcO&ZJA{gR>g9&x(! z@&3#CWk?J7|GqtmByp-Y_owTL$iFYmF>Or-Sd=L_x&&l`<$Fyqy`Nmqhdndmlv>|9husbZ~j7L(OXbdw~LB2f2rcyxhJH zh58ER6OxWQ+i&PZ+`FAt5qn2~?#tR>VLh$Kdm3KWmKk5e@RNDrzWl3ZN3p!!dzd1& zm*|8v8@}qvVINKG6B2JVE;|-NS=Vz};0l@jN674-R5~-Oe8T@&gMN|#*(?72wCUmN zbQ~Kn`L~fN@tv%SR*zrf1cs+=+aJ#GH+=0VRng(B!C8FBotG*#urhh_2vyfdnGoAu z*gURUm#qL;O8bPFN;W_U3^CeInD4Z?n%QgVy=*)3vu%z_>X}mP*hUKTVcElY%`%7<%rn8~=WBAn;=3 zyC=6K+8Vk>#?-|UdDU5!uyg`XVJ6|2N(G8||LMh>{%M=KKb&7>Rrnz2>iXC$gm7xw z1sS>uY~?NZ@{SFQlfr=A#mqYsgLe{zpc$4KvkXZYUqTdLyE$dfI^qfNL9=rVBuwV!W0L6Afruy7*$G>YGMv?t@omv{78Sr@h zpgto8G($icFKyQ|FX6Xmc9Gd2Ho1 zJ?>#{wUNmST^rW}HO;yYE1kKZ8L$SuT1ZpLsrLYts&BWMNgM++>|Ey^!UWHpA0Be$ z7tJu&;eX;=*Bfxcpgh7^F0C4;9wHquLZLU|w5HqvH)E1L4TXU7aM}w|%TGe2p^#9F z5GxAnCO|9%Suk<<9NxO%qN4%B9?3-pd6{H?>cW5TWwy-!xvgW=he>YXTP53j?12D$ zb~TFr#3ojCR z{~rF4#2c<=akv?^}n&^ z?Nff98k%aV&nc^3O&@;f;d$lmO2tM^FsrTeb|(wB*16QW6kRyaJQ6wP4PNfGDL*pp zCf`eJzI8a$x^1uD=6mYduZh`jBsU^@w8^srs>L8ixfu#tsK~dOtVf0m zaN*aGOG2t&3TD}$+m*_D8BipWUl4;tJ(#JC3WpXpJ;rxN>MjK;tl^)M|J^|~j#0a{ zivAqr2Jn9CC|gA8Ot%8NChZ3JWOm!iwCz}HY&$LU-RRw3v$?T19L0SC4BObGA{)h& z0_yy(_cWGvo##$X?xP79aJ+8u?L2d$z3Yf(E>raA`x1%O&u%_K2E)!g-40)_>lcnX zDyoTyZmzo|eEj_06|Nq$#gA6e+4Q?>y>}U5+$0x-P>TJ$m)+^%*bt|or6v9kN(nil zLNm~vln|xp>3Ml|8xakVdIFgGgAFfkU&1wt1Hq&gdP0c(3z9$A(9xBXYXj?t`_Bet zlAkeRr=(GiG(i&Iz=-CGO?`FIq2!i=lYo%9Rd}o)XYg?>{09WWx(lbjmWkKNc*Ar} z@6_(F4vYnb)hQeKw2*!r4Vb(#+q;e=r2ffukzZP1AuS5V#@xiVNz$Q_4M4(>kzow+>#y1UosUw#^*3iDv zA}-Z&J%i!Qsm45a{i7GuK9#74^zU2UJ<4IT=GuJjgYB_D0)O7>$x~Y<`S3#doVW{L zloO>>=>hll#$N**>6Inh3})Ppv>(6F-j#6swRo%wkVyVA-SIhFsxGOTT+fGHCd=Z-@Pin$?Dlkpgq)%gI`4{ADH}c8G)Hi>S^C z`kD~sN<)@?T<#j9GON8kp!7N9++Q~KxE69lXY(3b6$*uAo2mlwBLD&Eh7M;1CDrf+ z1L;*fR~HrBZ0tr)83#+%6XVDI8rK$K)K8e(4_K>iV?m#G7!Y*Q5;7?#0h08Pc=s6u|Okf0H^*y5wNU@MX zu2lfpmJy2YcYxFo5+GPRHWzV!ruah!K0n*;)m?u9!j~*4^T7Aw~0s<5Zf(j~yBH4Nu&>stUh>6mK@PPBvKfwJ~fHQdH z$p}-GKcG!1S@f{|`*F}aM(@_zvOGfT5aaXJV(WxL4<2@YQZtkivPWi>G!}#!A?XL+ z*EdvI-~CE#LddfqROssaiC}X^p2VKaB_1d5)>A zKSZzbJU(%gd8Y&8>mKU`rbLlhDRzyOQ`_0iYSVY8N=L4aY#92c@=ZW2;CT1twabj& z@7ZsYp5HMa8_UZd#wY!uRq@urgU!-`J$7XR5LG8?a}ORmL#4ezWsZK4dE|ohM>{PoaGV340T-X?IG#a~q5g=efa)n{l*o3cC@~-+kdM zX&qZfAz=`h9GnI`r4YTfLB@G46l$5FX-J__iT}W@FKhd44YplBKx8}sl93@?U0v;k zs7=}FFWz8a!dfhT_syF(BKFx0Rtqe-4T!k*n=e8v`p=QiELb6?asU?BXPC(A3$c&1 z=G4edS7w?6&$qR+yWD)ziFC~~bR4Dd6@|l-CH%{60P6v+0V_NEy?y@^Xf|~30r%AN zQweoLBy2cOb;*~@Pq@uPM^hExTH489_J;(E4s$_^@S5U~ z8?Q6_dA81mI+ zoz%_or=Tqhd3~f8vN=zN;im|1No)|hekogMI zE>|wjRNkAGnJrbc`<$xc8BVV4-R`HU8`5vOed8{TLr&$)Pnz0Jyb^+Z6XB;jc11mU zL`Q*E%!#8IPqyqh=!Xg0G^Vv{Df`!MPz;i|{ps`bt1j_johP zW!7amjNxL6AgX2f=RU6B%C4#Rr6(SK7@ZI9(obo+fqA+IJfY*h7f>Nmyue+ZMyL<* z@(?~B_DXzb6c}_vyyxiYSF-~Qw?)pYkMqb${rBVGm*697{j$)l=`<#3BY_WkAx9!t zoMOM~%Hcnp5vbCb~d@4+NFUuMt91r*0LnFUJo+UB_$Nb!0k?;Y83gV(Wgo$0$| z!I`m;-3h1Emz8urHCUKCyn8W2S+p!l$sy>_jtacf_b=;-w)(L-y<6X+WN1-euc`cE zeTD#Btgn?BZ1Q^Aj@{8I?=?D|*8Wu%@i2%w^?h?DJ2V(IJBVu3FLcfE5Zbb31#tUv zEKT1__fv=)kkn_BDtDp~B10!3Yp@i)>}@5=BP^-{M)S}}m}cn{BExTjtk{@_!E_4! z`U3kuh|!fM?h`&`Q@@}(lyn$kg(_r3bg6Xprt4@CfSQ6dNsvAE>(}P87BZ`17bp1{ ztgNhe^>@n37REX?CV@KPaGM$_1^;i{tq)4*U0?S!CnCYF@syb5eJ$Jz9TD6fc9K<7 zg>x95HdMmj)}v`jJXEhhr8ngNBhvfx?B;mE{`BXuJvA5LfAJyz4gb1bF;|OU3lDEn z6?knvFLp`h{qdnk#tmPc)9ol+t0g}(-}fK7%VWdIZ8IENM8797VaM;Zx*;u>>Q9EE zFO%52T9e0)8?8IMa@j38Tcg@NG1cyc`SGQ^-74wS8b!8;`Pc@24pI^26}~)#Z4?UW zG;d0&SkfoSVeTFt%P7QUi3jEeOv&f*2lbELPS3KS>#t$yh-y?YRDM@B_G&Q5NTdpMwHG26n2E$2=cZdogF?DGDS9RRblC!=W#orI5~l3wDF^XN z?waFH(SC%M`m9H-))wVScgTc z3P%JagSaFNZ^AAPc`ca+YexSE^me(|%KT>sO)h1KRR?W8aK)?uCSQlWxl$VDFd95a{Xw3hV`-w`{ZiM0Xo?&tvCy`@n9tQ0B8)2j#fDc3dr_lJiumBgaj7Zk$9u`d z%4%QZ5)VZy`4MD&!)ehJtSmWDk^vc4uj9u84i=|S;vmWujH^gRuoRi`k`*&JQbFIO zXSRqq^O3eHMy}n7$h;Pkih~8-W1}xzL@^xzO`Tj z#D5Zcis_Ehq2D+4PcpfQba=X5(%SUX@<~#5E2UE*ld%WvTe93Z2!9Up^VTTb&%9-||`VkD_nDWNOIL?oPRDro%HsVUi*nXb~g0jA!Q z!z&IbeRDmyO;F?v=g}a{jE+VEB_oj+T5LCO@?D?1HKA>LIF0|0umg^bllDaoH=ekQ z&v0&Sc7~@<@sL}6s(cZAOBpW`en&(xucBfeI*$!*a5}9z-CNf1@rSgBLC}MB_lW4@ zEcT3Pn*DX>ZK}8OEwXUBQI=6CN0jz!M;qGyVo#O1709@xQq^#m*Ox``FyEcWMHvO< z59_T8R}K2Gzxep?>_vX19XVw1XLx-}=C8N*Tl3b`g!Xs#88z0Qlf(V{etXY5cBbOU zcNPB8EaQVks^2(Oa(`;8tdjX!$!K{T!3?lxR)f4R8iAbQ!Dl zKN)`pfa!wWCE_tlC5b*Fw;Ww-CaSt+Z;l!PBYW;jnfHEyHU78W9)Z>%B ze_ZQ`V4(<8L)yxuv(Bl(^2S30uXoS~O z{eLKr#Hpm=Q|jxTuFF{xOJ8*o7Iq=2NA!VByS{%;xJos>H{Ga3i}m10p{9RU@yUu5 zL#@kF&EM>ei+#CLoh4~9elhG%Jfxen2aO55ACyDPrB|7m^(!dNzCF5Oj3fQf;|F!> z*DlHCUdXz8#n5q6JnN|kw>z5eE*Jmt^UK%BfkP6DR_!z5{})?9ZctQdV_8(x%U^TB zmLJ*46HdMES@6v0arSo-KC*5~E~n3nTgNL>Ej=CDb@>4EHXHQZ(e)c{b_)3U`UBG~ z=Zq_|O7lAe3fP%$rR=8GHnVua6J$LcYC6{Km7vONYRZ!KP%woP9#&P|1&>`E7i}K8 z$?x2uSh(@PGhVN9cNQ8tIuGX0EuOi+j6m`J=fhon2jp3@_3*D`gScGNqXN9Pm!z8g zGZ@d5p1(}z=6K_HPbs?`8v}>Y+r4HD@(~Du5Oh!rH06sL+u&b&YltRozmj$s!g$+6 zcRr`lIn~jm$Qq#p>?g8zkKw`clb>QNpKz(1nCQ1u(h!>X;ws*D+559WpNx-o;^nJ5 z({uwzMy#I1$L|*jKOnO6bKbMvivrZD?9^hk88*El%U|9;t>7WQ>Z5JlQqb)t3x}DV zP25crEe}+;;rA@#Upz6nm5hUsdcm~3hC&7ol1Yq$BMH6#!W`O#GEyVdCEV3UnKJB~ zyKaf(rwiYdI{ULk;3WRicx6Utj_JBP@X*dZymrXCtYEx4vWSglUHrF)<4vVPRx%G6 zEq6#IaL7wV4YT^o(HB@6c?I|noD&Q1;@0?jw)V4K{8~%3-mbd9eRStj_lFCwsEi7nvlN>%^_A`)tXdanccEgL zUVvY*oPYD^I+K2j=plEjc+DNa6jIzb>Pfmy32|RpxJ=BG3U9`=uuD(C8u%EYa~D%? z{fC)N+~;E3A}+|+5JQ`=-S_V|KutK@^sv^Z1zX(2XIh%flGnnSaR9Ibr6gdt zM^e8rqf(e$!dd^fK25Jev;_{L$BXFV8pFQ6&l3_$@_H^K{(~-L=LSvcwLJ-5ibHuB zZ0tuTrhbZXc6c`LZTi*X`j;R~GikE+^`0Ye9T0aEoafh(?*mLuQB{==4;BQ|BwP*3 z@*d#ml{jl4G_7y+AAi$d>++-RezNipL^nh)W#_%kza;k4#d0B7oJnfHz=S?ey9AS3 zz2xz$S=PVNT6W@mc6ZfjshNrMd+oRN3(II+cbTz1ZQf}vu_x|e`H)uL%#`Wg>|g8&b_HjaS3aLAJDv9XvBsx`QEjBYTsBXn z3AYQD3^NvcbmB~QZ1tInCL@-i3%$qNp9|-wJK$r*1imIkOiRRWA|)#V02*OSR%lxS z@JL>mFdwse0BqRG;jrTVnJqkj$RJfG#+LdT)<%aF0>3`HG z8s9rZGW(C3q~>gTwn_~1Q_G47%u**`c{-$aC6TtSI6!S8{ak|XMf^ALN|ePF8<#$8 zYrJ{aMr4;VMKm#hJ>}5Bo+a0nTaLef`dUfbX*UCw>}G`^%l^={HvM&1@?2>#N8Qpi z^{sf@*&n={hFXr3BZSH`#PDTwt*B}1*c-ff7oq#OSbm;$OWt&)+@JWSHO3xN6q zODRE56mcw(L@Y!Qmtn-N@kk4mU*tf^CNUBdvn~pEfgR$}ZCXz*IDl{HpeB_(<*GyU(gCo>p$_k{@QiETl7L zuh!-&;4Ut+d~%Fdxkn+sQA$b4Ay}LDv8BhJ2P^rJTUR z>sv~W4v8){_bL|TYzd3h2(WmJ;a?F4IbX=%}aw9kQt#Es)LM0(W8c?uB54i3WP=AM^+Cn_q0{8W+f)1WrMC6>Y6 z9fS-qxEWW#sflyqSL;=W|2q&F#XJQ%$1#`c8N{}>Y4f%~|1pNMJ({@q(eo{qR0 zk&?Hqph$RCX!@&bFKc|%`gc??ciq; z)?Mj3Dih~v!x}HzaqbhBYl#fGRlKzmUtBF?eR^2e_oA4YQ)fJW`v36d5kj~OqG}OqxlW6 z(h>s@Y)A6&6w zg-yoc|4h}fG|7XQXno~U81Jl7a`37U==O_~l8oqkNuU2h@ml}BRSFlY9XRSF^fO1! zGu+!$clGN?fu(_0IIDWjbkz!{8AXw^)u?9V{l>hE$v{f+qgZwP@}Nq?Xsz&^^nM?g zY2Q;8R?P1{4fl{^i}H@r9_!!jdK;L+NT4}M)X;fViO`j0B$Cz1^~+vQJZy%>DIlJZ z7hZuox`|M{)K01Lt(e1Dj^%C~`d;hcSD4m_Wo0$;&XYf*RV0`JpvIc>m2D6?(2pGSss)Bbxu%s@nImJa`r9r%zb$V zmk<98*z4*1O-(+r*NRzH=^G7QZMX>rudjs(+{T|DEFY$iJ~DI0WZP~PYVMb1%^np@ z8aeq$7xC)8lq8-z0f(WXaShU8!0`%`3K4j{9s@uo<&m)BO%P5Q%9OOHDa4*` z(b~Z>9v!kQam;W*{=Bd*Q&u>g^LYPC_w1dG7h1G7vyJOp%BON}44(MDg1}SBRJ>kB z-WBa0ZE;IAlwWo}> zF^y}_Q+AK*l?dFJnT%zio$lL`RpO$o9zdb9?(;U^w~|M@EP;+Ub|_Zd;Fgi8Z>g0} zr=UWd>AL#$^p>OE?s2O&jlFpu8bJ9vBhL2Kmim>lOMZrA?7l<1w-m5^LXHl>|Dd%) z6w}IO=bq)!q~yz@Rg8(a<-c56`c1?@^{9)$K+R-RFhNCLw-N9Fm^}EWe<5ME%l^x! zS#f^;YCZ=&T}QR|gT_iZ%ExBsJ;XIquSO15_cVPh9+P5re9qULa_dS*W6S@CtnUEF zdVl|ah>}f6HjzC`viFEeWMvDH5!o{1Hz5g`*&`xZnI$vIChhzrX8T zr*mDWb9$c7=RNNGweHe~hWs3biyxU<*U}H(lxrk{iorgxBG9O)rPh(o-Q2%_*KB}& z@>|aNO!9AKRPQT&Q_opW4oru8(|DQ7b;-2vZy&#=pfJLj*9>j)Hc`z*>+YrZI12W0^E>?iRqTJ zz5hnNwH{U9Ik*iFhA|>H$wzhX&1)S}|E`IBS&D$AQTF#Tni`x|htf_q2vj&T>4y{+ z*J-h2i}i3}cl_XRVBlxC(t{KBes@l(GVE#K{UNOKQ>m{6F}R1hZ;Qs;GM-LRY(WaJn=Kk)T+I?(d4T%aPWbk8^Y z#+EkZ%py$QFsdD$N1~9emOBAO6^m1J5_7TU^ZQ@b28T@|l9;)k5^J|8a|SChSNPpX z^jTeuQXt(>S|{C!a`PeT9W~ha-pQIvkus%qLSy<2sNQ@)+fwte21tM?M(HL^+uh~C zLolB5nd2;W44tUv`+Z1v!C*i~(YR0rGpCMV;NtWjnu7qYgRBs(NB`!s!p_frFueD; zfJi>0tZf*h!Fv)9o5AKoI&( zk8%@97rf6-)atwf=C3PN!+SVBKGy5tt%-dstoh4jYl+cLMy?YX*kNHdLcvvTxNSd`@KC<}rI4BV`2$AgO?EKtoc|q+ZCYH3z47 zSaftpNqLk02d}^0Q{l~$5(M!D1Hh45Pr#HNQGd_=PPt}aqi*C!ddakSRev(~{cAqn zemF&6%+(d<&+CV946-N$19kK5jY+cWGF_2UZtPTa;)vmGV@)19x)`fbV^#k`y8qyYt9TowcjlCZ%BeAaekNr*_`YDB$AB+KI!1wL4e zcbEj8Enf($Aw(x1bZy;Sbxn!hQ>Y7(q;&X96s*;{P9I+aC0pB1D_K(X>F?dUr@7R- z;DiWQ*e4|+EB`xep;{OKuo7C{k~48vN3sSc0>6nK^URd)Uz$sG4L^6km>&*W-Fr0B zQIBHX*NVCxvy;V#5@`0mD$nXP<6-U!^COR((u%h#TrX*EZJB+=MfY~==dCOZa70jxI_3I)Tr3C6IIz8@(COjf<2$*3_`7x-Y5fmQ8LG}g z;>0)P|HZFhPUddYu&Dh?(T2~G%u|Bx;!d!`7Z&f{$ZYx9&SF7f{5IR?N2nASSR)O z&qaNR(xJO>EqTD;hS31Q_8L*p_k8yzwb*&cuo++Dj3}h?26+n-YPu)?t*X$<@i;*8 zD)j3dbrxu<&|M;|r<8R99%M{BH~Hf)J^@3bG{OE?8|62Kx;b{D_CnzJHDrSmDOo&F z+uBGS7qjvC?)x6MpUWpo-JG@4v($2FD%XpS^jzdm$XM%bcVKs(zCxivAD@sA1ONjf zJuW@}dnv471Kf1`jG0H3ePjPCt8t058=jE7*Z^alK}mtba`GfIo?KqwO}G$Kge>v1 zb8@n@xR9n9QbJhR*}Fa<3UYkRWp5X>>RQjJvay43Gkzt6A4y_2T?jYSd&IWX%_dWT z$Nz}&SdiIEU#3RU7MKxI_4?3D-@)hchflBJE3JAN6MlUg0&KfSsjf=+CXiHfvD>Jw zE!sG@<8MrAAz)DS$yu{i?8pnLLvFb`fpfK%hCO?U$?05oxNaSfYm1;!G! zfREV7sTAS^k%&|9FCe?9e+eCg&H+0edowfeRIQM`K;a>qaU<*tg&00!uuN ztP0(@ z#kFF8bxkY-3uTX&UfP1ZTFO;Quo_Yn^-6V+G!Ui@5qCj)5=7bH!xawxhT0;z;?keB z`e79%<<}OIC|KuH9`+yIxtr~qf`1X4^0F!}j>q{;!vdLoj=T{=UNW)~mu!yrda7ja zE2G!=I!=O6>&A2tz>lM5hE`9nZFRD?FKv8N(b9l{DiAqb`lDJ)|NG2{=`)7>7$8a@ zdu#*p>$KcG|5{243`O||-GNK27~{T#*A53J^FUiY*%?bOzPPiG21kMk-W;a#p|HqH z8B#{V!J4JFST4pciLk!^5`F$$;#xzH-qUhAIFcQq9$x{m#<${J5YRAvZ$Jkli!uN& zO;Rbr9if$I^YG{lPP2L9gfp4yKxG@T=SDpMVwxM~^Frnp z>5dXtVsZQ5=Q3HM)=+EuSwdeO>G9Hn;hPX4c^CunI%1MHY1LI!9KF870pa>g5@z)w z)e`LKfVRC2%NyfQ@`g5{P(-R05OyxqUDECaeWCkNhEJ;LxN7PvWXXD79=YKe6zbKQ zvoSI!ZcF5KEi}AeyEfy5l|$3x-%4jeQP*@CfvwhBW*ss z^CvQHwCDT1T&Ci4clE3qRr_So&3kXwx^7%5N#C^dj;pRfdClS5#V`DnBBU#;bT4|_ z@lr)L=H%BaC@PNsO2frKW&t(^W~3Yi`7bsG6rre12VLOJqQ42B?kmH?{~xR(J}{@P z0C+X$icjc3U?4WeU%%ANYihFo+1!=ex0~8LAaMoRhu#6qh}7wDz@R|`C|Cqd90YMs zNwwn^^@m^`AD_ph$ExR%B;b|GhRtzds-2Ncqe42aRR1o6!dI`<@j(R1u3t?0sBBxh zOhI8@Aw?-rhQGzk?wjSwt!Nz)?RifMO)LkOZKW%IU2%rj0w~M7K4hM*Fy|Wl1OhvL zP=|%UxKT_q=<`t!1DrGFz)?!o>qT%s4E6ZI2r~EDZ#&z#>8&DzcX~pU}+#TGxNpqk8;AL>G!#m9a>(u zjqfigNJUXC$;Oj9aHfAC^=E$R!<2x>7W15xm8J4kl-I<{hu585)qC&F4ZJriO4DwB zzUA8xM3(WfafZwgXG8ZsU2M&+`z_0=Nvk&OjSScDXZP%%#u{~3nC?j*_7UTt?iw&Q~}(o@=d&$p{y_o81Kmx=Tr=-!wD%`x&WCf^W^sDb=4XiF((KQgY=F8;1$>(Tw}Y0hYvxl3LXXpQ}(Xr-Iu3O z{GOQEW?oYEE?W9Z{Jr%C@xjJuVZB|)GuL+rmEu0{@?NVk;gb1S>w{??Cp-jRk)w@8 z7P^-KA=p~cF^T3dscm}$Dy;ZV^KE!q)tv#VJ`w_S#zHM-KD$K{Jm1-eE^3pF_$cL& zfxtA+ZLM!$pbKI=VpCG68^GH341}M;x+;HNLRzOsl72t=oPHcB(94DaS%N0M+@gm0 zgl!;5G&@6s;w(ZdFKn@#l z*(l5&d5++&B+A@x(?qICcoiEHw&l!F-yvcPiqL68G&K01pZISh)xWWqrHX(7u=r$y zUv-Z^r{Wxk`&>%t&?%U&ZGj}pP%`q}I-tamI1&tC5$IW=FAc?QxVqFAoQwC*w16p+ zez71o_gy%9x(ozuA%qv=6DeQ6n1=7J86u^YXHp(*K2EEmB^MsAQIt9A( zgW_pJVp_rcp-R9C&016C^4$8KQ4*}@nr*Rh-9pU-bRV@1*FpGWknKacOjG1WEuK`| zRphc%ZlG1o7jM(oRKUP0(pYNnjQ`m4G>f<-JPbgB$o*gjx^02f3#5uX-uRnVBE0y74+%J_2y z4IE5p#m>%-lMy!r&OqoF3CtNK%oz*=X#XVWMfn>xu)&?hh>H3?2Z)kE0!q+j&O*}; zV`tfj>CX8B0CSLv0_K@lz~m&qwQ|a6mpFz1YQ&R0=Q}QzHpC$l5W7ayCen8DmDE)4 zhleM76!*WpeMmls%6#Q}HQZ`2QcBDYl!+ePbZ-}bxCxP*id@c*kT3v0T*HibZu|bX z`k~FPFFqn#3R;z@t?KUWm4}iN1%J&iEumr=Q54`wLZeq9Ne&s7rGM-kL$f!)`&nCW z_t*1asVVt%z!MoMqXq{>n}WvHuy*M=r0pS;Tvd{Rl++(Biww}5@|IHqeT=l)3&sy5 zbgyTf093lup}@(kS}fxjuwmmdSV&s;`oW~%G;()I)Aa^;Tp7J?dW`q$nODyqgYv6* zwSkfxq5K)KOK-Y_@L%q4Yhtb0KXNrOW3aP6%{Wi{X#Z_G;kOjLchR4UHgr_>TZoE> zetzFphyKPKnwwWPks0PNTYD4q+H&C21T`{5led9^0}U+_(fzyA(K9%3i}X zYl{_-4_>Sp&=NTC+3J13!qNSOJKK*7o~uUuS|z)an3D|+n0hKibN(d-tI0b2gI4h4 zc?E^CG0|26!Q1$u6>~*idJaG5u@Wa!MsIu6H9l`k>A>&XKZzTf0H6kC{5K;=fCiAR zB2?1B1wu)&&{W5lsOkClXHeMnw|X^E*D_!LIuAN2A8O>{(%0A5=9QnMlQTvbNqGOeG;_M&w9e zNM!)H!2NPsiJGjK99Z$GP^g>beVI`Wh+5p`+B#hukS@f6?^p z1*)#cR?r#(bQOv!jABht#!G;z^R#!!yYvv6?ib5S7Z(5l%m=5~yZo3mLRTej5Nt&4 z)8#ziI$1oYW{zM%x#jfqM1XMt!U*b` zYcRkTCUDb&eyAstb_6e*t{^4&Z28`z zE^AoSz01QK^oh{IPK{P6YP82HJ72bTBJ{2XJ)VoL^vwd;Li4j0bAD2_GI$FAB2nxkwwtGTH7pG}MmPAgySD4r( zi31~r8ML!Lf@iSx^M<1-$+ctQSgxzb`RnM*rWrUu2=fT6w97OMp7CrzitqWI^|x4t;?&Xb#qfx$){GDz<{6Xo>+lr9QlAdu|l#+xvE1gLvg&BQ?d-I zHg9ts?W`g3zwiQW?-9cDUry1f@U*_VKB4-2al~>4R67jRF;-)2@2^!4O5Hv?6n5hQ zWIALz7S5B4=%{Gzg81R%8u?!vEzCv&skic2StN=BltTf#PX4)9D`AFp7{9PCk1Azz zt^bU~R_}slTRPAqeIJfT`ljk?TYaM{uj-_)}MKQTBRVaxfg(n*}(qE5?CHp=?QyDK($Kdvw)5~QX z$eUt#cdjPd4j<+t@Hq=h5ZPow&ZnA|0HgylIfHA@dE|*g-ir}~jh-1oWc?g{*dJ8+ zNb#|Lgs#_N^A359<+myX=P7aS{?O7awD|ZDpzk&I2$rUEalx=<^g7-j@2o1=&J;12 zHcVY2*h`wNY$3zb62Cdyw4}<6ZdNLoVE&&ETGt03^hEgP2zj@~^7g1`3KfKubn5daUAtfRSk!&Eh&kpeYvhDPoM7^X)t^GRd7-syx)x#Qdpqq36$b z(QZyZ_X5P~f14QosVZ2!HJ#tLV)S+tD59LMzJsvYYrr%kGzZ{ZNdFCir)dr&+Emok z1waOP%jz1j~EA)r~jhx&o^MT0s4#_1D)_9Oh<>Y3)mA#@kA&wXm<$Zok_$elGcj zBeisTv2uErg_Q=hHTPNH7=%a>ogp?qUKs_aC3?0Qa0@g{mNT%%L`!XsOQ*s&D)atN zJPZpYB^3(*%lYcJ#siB7MU{L^y) zQQ|m0K~)lFsa$zWX!@3c4lrv2uK51t#aq+zz`ZPd3Eb9B)W@(IY9%TC*p{cRQ2b|Sct+hnsLLz8v_1S*L7xai)KM}5fSv~`wgE~U zEGTR36_fMDI5__#4UGGQ<@McIE^lt{4_Kxw1ui46ulSq>1;Pl4S;cO;m)twLuO5m~ z_$q{Iw)8y9FnnHlt#k2iKY|Ev0i{QtwYYkpAi8OYq-gxh{^q?Fsp}QkAj5u_AUkW# zxVF&wAC@buF#iBVGWb83`PDRpnHM^~bMCpz2G`W->wPjXq%Dc(s^*&WiT&KI7d&Uo zH)rjT5qMG4Ern>Pd&8~(Gc~41P}IIFtDbr8W9^;`!=53h3w5#@uEdBGM(}6Zq!8rE z7m7(bhLQlPV)}%_FISEtK%3oJFwfRB(Z@f#%xj035fvk%t(3eZNdva`2YpAky~V{G4bXJOsHod#Ol2lx<#J7~-{FM)ax3jEc>5tMF8+;?!cmyGlUWEN4y?;{}aJn5Zw#*0){l9uar=P?SV~944JpAC5Q# z=f1F=@XEp%@KKzLI*)@DTeI0l|FIL!p(>fThiT|kxa8{AypkKEt+qixef-SGhQ5LB z@1iiE-?}4)R!Ze=P7Q*<_pf{4v_5r$B4C~dxj7>uA_QDlv>G7X1)x4h_dj)&)}fcM zCm?4Al7p*5#noXFHlTO2U;ykyFgvq+$hn87;lD#;1|iRhTEG;u<0V;bHy#MC)@ByN4C~`rBg>F|#r2Cttf< zzc7(zCM7!wA;Yg_3oQQzXf>&5yPxyP|6wd+IuKetjtzxD5%8^WFeoL5uhHb- zQBZ0d{3cU~lB-oH6bIRy3vZr_(+guCGwkT~%YCOAvrT*r8jiVw_}$Ix8Lw>KzCY8f z7IN#;cjg4Vy^14i8;WCJS8Ph5LMg19dOLL_q5iPh1zTzBdn4Oek##Q&m|Fq?Ls~9} z8#nnzd*p+JERn>IZkOg^Xvr#Hs7d#qdt{ogwqf604qtKempCUES1V{tk77V5U=jYM zx?*>AOgsu&ql#fPT!1i9H5hh~T=`ginFo4G+4zM3W`kPRsC?yfd((MMO>!9e=gO}p z++E}%2^z02(J*;Fr04j`U<4(7A#aoZEwtwZ$KnQ{2kQ^Q9ZuQg2T z5WwQr-rl~OgvX>gvZ;Llg|qlr5YH)J+41mH!@odv`*MWD5ajzH|FO^BxYfU=L5Ww$ zc?~vF(8IRR^lZZw2q_l8Nh$$)7w$Y!<3NIeJ(|+C58_(-n71lfmop(3?A7Wn5U`I* z)=}y;nMUo%=x->Ht$z33YhU1|iEAK4$mCjICj{TX{}ZJbz@!NMRLdTDwt4DJ5c1eH z>Va?lx~!_iTTzVB$q`bVtYOcQA_!zT?REHMbBU%jA9~$vINrUF9UXDfuX3NjrJ@F$}%m3M#;qUpV4Cr%st< zcjaMo7fuo&29Le^;DRjvAiNAf^Q?g&(7dt9cjkbZD*z3}#@=2?Mh4@P%F6hR3=%Mb z!LpoRTukyb1`iPe-=L@i>G#*9Dmd(3f7l1j4RX@L{4)b%SXg0UTS`T+M$>8Va&YRM zvn4+@@D+1gwuQ23@l#oRyvF>KDlRxH-ux=PLYAod7y#FPh8M?hu;7*fi9NR~I6Yv( zEd)-9a8>enrkqJT@s_TywQk6hqf|rNmr?YT3%(50Xeu@;ueOzt^bpoeI0Pg?I+ll< z5zE%iedU?aNL+xwK%4uHW?Pmzd+YHJaSbpb&NR~2yRW-_b=RX%#0zYOY?p(#PX8G& za&zc#_9lTAK|`FFtzS2VV{x2JUJQjFxr4e^3U+YOD8?ryyU1LecM zAQpmaJM8--!tX-AT<5RnHj5M*-y%KD)IB%K!T&td^g{Y>fMnd458|f{8NNP>5uPg5 z$Q!p(6d3hGj8 z%3>!2B3OUI+5p%QS#1vrwh}}iy0^wxpkqUq*$yW}$}?s%#l_Y22czlzL9v>FJx16k zzOu%eeRF3ycs^trp6DXL#DQ`JvI9`awGvG7M)OmU1LS5|{~%=iJdBwX^E-@NyL5aB zEqB-RcRQS=ADL@@ee>&K;@z;=bno`2j;}fEzBt;AcqxX@(o4qTY`HQ@@JDe>UyBn6 zTJFev|F*94P+XcEkQx@&DNo(ox@Ba;#ZA4Vg;6hDKI3qm*!KZk^`oCLOshoI4xF-; znz$M-O)DbwHXZnMzCQ@c^{JebO(RSbD{_--kW1?yJ^uxfhG2cSGeOL6$+yO_WW5cmbF^ESock!9W0;nxsj=5Ecd`;g~lA6=_RzbI!fL6Y-1XbyQiT z?h8yYwEKLBtimfiK6}enOH0Lrm5L{}^is&u53J)Ji*qME^_{b$ef?IkbtuqK0YDUBeD3%BeyR#XZt+? z6Ih}UmigpYvzIB>QTD6rdylQ0nvOy;&z%{&U~LNI(fe4h|MeFSEc{|MVz%RE&{0zu zt1;$Nq@Ylv#bTQp=-&E1=#q$tNSW-YQ|Y92yQ)L(_V1q&6)@9)*b-<6XxffofPcrr zlo>a!?9f3n1%&TG#Q9{~12j}`cb9itV$#FK<*C~Gp@V&TW5w&uP$yM4VXTcQiA6q} zsNdVQVqqJC&-tGUJ1g`Q`zDeiJ|*Ae@2mK6mpJHlSl1E1ypxr$G~Ykd!l1T!1eBzj z$xQnk+++7gDPOx@N61qhZD3J09-Z2VJVly=tUC0~$xlm{g7FxEX<9NgP(#U}un1PynnjpmB#GzYrL9h?h@F@mqeyjFEwi z6#7n}H0K<@E1gO>3bX@11lE9?YUMQqp}^0>&7H?q+DC*+#CEP4XxXT_TcKdWM=;nA zLLlTpK``_VFcZL5ACx4YI*t~)Tf6QfXWjU5PZcMblSJvCDEv806fISqiypC@ruvZ( z?h|Bgng`OmIe;`spJ{`9C0>gKF{C_i9+)B3_#?@*P%vmt>s|dM@~ntti%7?3=kqSa z`Mm?yb|Xp6QTJ)hw}_KA;c8voTrvxU^Z0D)%YcUU@rBi2eVMmJ z8OaQgjtiJZl%r_&^B)bBr2!IJE7_QyfiY^A{bJb>ThnIJ~kTMdKCJ}2#(OVvL zO{d$#xomGxt~>G@tp()`1QNDFX8tE;bx2~u&ZwRw92&U*T|DKl#b+PArdLWfL68Ut zP2I=)WpOz*ri0Q~+3FI6H+0_>?fr~<+YrhHw*)=aT)7PRR`!f|pT^Rk=aU;#agFwl zF1Kf>9U}R1n7c_V#geg7FCZ0c0;ZSGiQVx=Hi1>>MzLi{1whjR(}1o)qq-oK3|3I| zlU>INGLbJ|zMy`A_3kPyIoPFrAcz4CXGJ-jkaYH~0>Yk{&z_}%2B7m@OAx9UG0kYy zAVfj6f(S%ia`#@F7}!tGAPE9`@6UhiA;Sn324JB!&}BR zw$COme9AzOcN_#NLRS+5^c$*IAfk;0>3yNuF#~!JVwJK>lGu(Tj|S-%?qIz4$Hfm{U9VNt_M=&y+<)a@Tn5tk2MLk zDKjUjg>GNYR+XG?Ve~vqaQmmonAG*hVIDV}$)y-+0>lHyeFLW)z&MBTFD&E;o&0kG z){1;I+l0)~C)P=)KN4wU1j@STxCtmJb)vJhyb zE%cZTJi4b4S~?~v2-^*yYZT*y#$~`8f*IRzUNT}Zz~jn7i|YsmziS|irWUrLfKE6D zMn&rKUgUeSb|GD@Z0MEA)st=MVmXv>I?CAun7=uFfxF93>~O1EKWen%Pag;lY$~?e zcVC&^?~nSli3^zRYUc|ZX<5>ZYoD3mc7Ee{_N`Re>2MFTE9Btw4X5@CrsZbE2A<1k z(f(kGfi++kL^ddD%x!Hd96Bjxh>U_sonVfF8lBbHV-^?2f<9~a1mm9|B#%Wx1$PZB zR4{sMd90ifMc{yEU)*RF@}Js)=|g&vk1&!BGy=T(Pn&bkCqVSuo8qRDgZJYGoA0}I ztE11)-bI^n6upd~R?n4TPM7r=dU!w>)3?>r*D7SV60tE#F?8X;jhaF{DvAIqNji1B z_P&)jx1v@)sLENQHRxZWtC|Jh%@eXensEVZlCQ zZtk^o;OsdX&h)NohM5Dvuam}Bj01xPn_#*?&QLS`T6iwM9|%jGBvJ%IhK@m&b5}HN zD0G?AqoR^tJ780_3m!ctzR}dIpt$&?nK639fQUdZ z&ZlKTZse3EcigB;JBo_Tgz-h|0$om<2blT*8@!{#>3YQy9#6VF3!ca0JpMbhZjK^X z6&^_~+$WyZI-9D{{Lx!(kh8VdOyZ(d=Qi!#xU1HD3m3BQ&q6Ak42asNjSkuibiBcj zM(O2gvw_SoYRuT7RFRwO=}9gld8;~IH0ganVuBi#m5puOVY1pz+Gqc9TLYXMa;MCX z7vAqytJ8$U<}?R5t3urU7IQc_Cxr`(sAz)c47x`OwoIQ4KO%lx zm8j3x(sih9kV9rx;5Oe-7|>_HE`W&r_hb9>cWq(!U`Yq#AVAb_U47>ariSkB?q;AP zurMg=^JJ_9nwpCy6CuAO9D&RUJsXf_P-k=1Da-7okD*iA>`Syfmr2)x11>(@o3yC~ z1ejQ%Fa&X?*%6m}*{HpSyhHlrPFEZAI9^e7jV)J=2o7o`o1NRgy{K2r!2c`0bpvkI z|NUt2YW;zuLxEm|KVL~hgADp*J4S;#7fy74LaCPp3zhneH7SV2LLlj~9(|yjyCZmG zI(&F1$`T>HV?vQqJDmxS`juYfxAlzx$m*+1!zL{&6{S~!H!Gn5;_WjV&u7duNt@%$ zgbB8OvpSk;^qrrLdE06I@>{Ph%>7N^DrgxDKRxJZM+Phd)G;7Nj#DlF_4+DQVeqNm z)VuSels!0Zb3jg3W_Z_7g^LVmZIiq*M2_c@w+Q3bUS6jrGG3*O;xOqbWrt%_BNaID z?j(rT$IkvikuoYt-DYz+biIxR!UBSW+Ga~!eAJ%rUc;K1OgpeA+r$8>lA~L0yvBFX zQU0l$y62FX5=37ky?FrxTj;KXw^ibOm8(Fb)MrqU*)T(NkP@P;V_>YH|k4@G?gJH)AS_>#io+OOFSW`243d$Yfian(I0ZRt}Gq{TFh)D+z<#M z*HI`Y96=&v&@b-1NC;QY)&KLUsnmaS9=lw0`)3}!diq?8XCGC>2~GGrj#DXeh`RfyDXTp_MI%hlh$;% z9#f(#reMkU$oK<#4Mm%W;a?pceP#Yw_|OmdzC77gJBr0JQTq)?Pd+0stUmN8vc2ddfArWCO^2b);NSONXksSTL=m5ql(Fyje z2V5z`4C;1(Ay8}{3KahS-KV{?^BNGdNXyvt^Q$c!fLMnPqhvN^C{B#@z$tAwbA$Cn zCEXERvh25(DL?pd-ynq+y>4AlA#b? zNFK;!3IcvqWEyJnXqO6~_y(sW8|NfhDPc(Y1)|bn0`jcZXp3H#bG3Nj8ozSQ2Ml>G zB?AJ`mua|)XIM5vVvP!~z#$AV`|2RbvtDYCNJ8h$JYd?Pd!)=Su8Re`-0pl!pzQ2& zbJv|>88Cj(-z?X!!UrRQqu7|;*^@B*^Lph8xRQ(iYml;OItUV6uC&oQ_fH)%&HVAk zX&5%_^JO8~dZt0NEuCS$n0bh8O!TrNmb`ky5ez9)v+@eANvepCrSV3BSDSKvNX7*s z#v+>ALJs>chC90QR&4!r3e0};ya|O*Df&V<8Wu))qNQ%UFf1-^I5s6C2*mu++9vYPod$d=+Vd!rA)=gFQP{@ z>?s%-9e!0Z;X$m1jI3p>@{Yeok@} z#YU@Jew4k+vnWQl4^D{ z9nXzBs@aXp2^3PciM$&!05~0i@?>LC3WCKZrIJc&Ot-)IO!reI$9vL705=QJf7e96P@%!MpkF*4zd}3)WWrDGz@CGH}@d=X9QX zw12pCt)Gn32i}93Mps!@HDC7aC$?RAGkfM-M-P#}nob-69i(_*;Em#?8}lEaPG=Itq-esq)c)RyX}*C zpM%d7on#U_99=4M0uv4qaIIgj7E#Mw<(7xvYdAnNNBHEGommGXfr zD*GqlYb*da9+h3&Ct5vsxTL7h6JPfFFaL`dS@5uxre-5agoezjsh%Hy9KixONyj{2 zfbf&WiA$%*1VA6_A$a!aFIKAYt|1qy3lnj7%0rZyR9=2QJ+BYf0+=U$Z+w>rh?;@^ z03U4V?I6EZP1A)O7t~avo)S6&17F;O#&JZ>l?^2z6xeF5)YQX~+yO0h8pAQbtLGrp z)s9bmLuO?CXC~j9BpIjZSh^CYs1?thK@7zK$O|==U8|CGIIIZ)Fah};=7!B*CZs%XZ1LO?~kVnb{ilo+Wy@4 zT@n)%ispBXOiZ~LV2kOzIQ$fE2kKMyD(#z3SEqH$=TkRYk!*8 zVdxxTU(1*G4Ru&zJN#nDIOe} zqIE7G0>O<&0>XQ@&(jB0^42*dJDlr(#eVI6tsRkjvDUjQ6x^d771>J~BP+8*bh8l{ zxCyv?qVW_lb>Gf}YQ;6HWyut_mQO83WQW0aX876HNJZUvzr3q@&iGZ2VoK@t6w`ol zn}E#op==+qw|L3*OiLYKCTds+XLs8uL~t%u_YbLPjn#!1lY~vwq(2vw@oL-K(Jwll z8<=aeh6mcY2Y#-3M{*vGz8^asUbM6HO=m_}ntG%>>+q3;h`6gBKMqN)4$Nn17vkH9 zn)FV|@+QCYJJh!M@vDMix6nk_faM86;jf}!~GU^QCr}1)l@v`2xsvR}j3E8k-yEv`1&7-JYx$7fJ zp|{H+u;RdN$SvcqQ(Q-d-zduD)fl<_(%_YCE|&^51shAT#{H6$Ot)UAPoRjgCv63k zESHX-(($w}V@k&NfM`!2=Z&wchM+#YXtgc)Bo7^ylRny@_M!mmGTVEb^d70(656rJ zzf?MvKL^V*ms(7^6eoD?u4vEx;S|Vf2LRkS7zYmn3ZRf!5L1SEsVGkem4h%azz0;( zAOyNese9m^)bbwQ1!7cOE~82@Ps~U2)9F*W?8L_nAH5n~`|(}h{DO*l^4QL00+Z#7 zIsjH4!qDJy2$UINEXJ^plFMSl6%A_vj;)2JGR3Fa&6;=Uyg7z%d4Hj+T$`xg4&M9p zVVftoyJp$wHe;{+oiGVCvMnCPXMufA8SDWVM)ua){#W%HE6^`O)at>#7smAt0*Mxgy~J`fi#A~F z4jLQbY^Hs%S*wyE0%YiJnNIKMs&XDXdFIy-;E6pSe&VovR!zC1x?2>U8|E}$<()y&>0FQp06`x#Lhai)e`SC%zN^_Mn zU52m)vtA49-l1f1;t`wIb7?MBRjL1}e1diFQkK*uvCxU?+~>ClyiAH{jLC6>vLUe^ zgVx2v9)V^H&b30`h~{#3C&4@eQdXZ*#|L`r4Vw8w0m z9KkY(W3PYf$kex>y39~@XO)tTrNprAQ$RZZj6MV5u5YE>M91zl>f#SduIx!U%9f3A z$b{ntwL*|43;xAyU!PAOB&;dOFMOcse)W$`TNK)Jf1p1sgnoJ*c?(+X;6;%)h!y5l!iBv|*vE$=cFoqsy@^}Tj8$Nl)5FXZbu*Q@)l z9Nn^&z0Ciudb>KVFk^T!tA06pR-^Wgy7^hE!HA64ZgcEbvi8nr`zhGW4Ek@~F(I&# z*6)x701mT!ET<))oduFv_bNb)GAO2n8?Cw0`Jxa6QH-FQ<*eut&@T&>@{>q9#F&RO zH+ke)h1eD=SpKjvzh?`*xLx0XP6kTL-*Ca8y_n-W3?y#dL6d2qE85#cSS>7mc_>gI z9d`!AS^#@?ME?#W^gQ){bgZZH3NL8N2BtUhz8b2*1>GnHBP%Pb{Uiy5iy@U?sD~8@ z_(kQd&vJ!ha|L5(ZE$}U&E|kh#Dy??W>|4}#%S0ujK`b-wm-+?6|<1tQ35TjM(eO`f$ z`ieds-?K1LB8Fhq?7|TI@IoH=vzoJ&t3?@=2C?%vLgnIQnd4=dp*FRMYdI~w!<2we zJ=l8O#iKcVS@6b4Z9chb_zI#i4@mR+CK%f$HOIzvpDIHqR7?hR` zQy;-X4ar1BKioX8@~nNy{6npYeg*Sl#kCH6G;n^e0;-x>aO4E^SfC;InpdtMTjJ9j zLx%wu21?_=8@DC%|AU+`Ursm>3~-b3NTS%BXLk~4<~vF)udEc024bqTj#g=fd*S~w z=&uR(gvhY0N*bv57E=g5P4-V>XA{#amb)@m$u)FvY3IDMk;xkLx3G9S>ZxD!qAAU1 zvwGWR$wGr*X85z~6dZ(ETH=UH zs?OmSi9ArfM2?lF9qZvsqm#ETTi8k>>%<#4FNJr{c2+y6ZSrN{J!{&yX zol_5EWUi$j&G^*Yj=bI4BV~M1!f8x+uT-?*udvHWezdwJ)`|fgj`GI`AGBt(RcG?2 zCp!$vyIi?^=54lelVWW9gBmIRAPkn2CVwdJLS#*r?%Ss$KAS0}^@Te6RW13jSKqq5 z8M+P+jYoYlx=YTx{uvhp&>54n%?DD12-WdKlA0;rVt@5~`;xqq{N?(feCE!P)zuw| zG)0m`h1osR&q2IC%n2B-6NeUs2Q%~aARUO{yC#l(1ay(bpuQ)_jR+W)a4<-Jk2qaG`|7gf4$k=}MQe|(E-MiFuN=(tJ9}8F zrxfC8dt)r;wCY;SB;H!f#*gEh2yA>f${L6xCB(nq-Ei6D>d$RJK)xhpuTJ((%4fx3 z!7bI_B|7rIVfRN>vFbjPW3Jwg3-<8qxx%I=LeBZc7~L(xhwiHg-A+n@&M8dg@}LhHE@N`F>BsEPH)pnc1nqKnd@f zSG3j1?ynWkEKMYBi(aRXc+|;N$8K5K9AQ^UJbr!J9#_#aa5&4kFk@$8Z(9UM=~(nt zmAYuYyna@i$_@9K8#_Dp8bzz58iE=W@is47;sZVChC1dVn2*Wa5}7wt(M3-;ZKsw_evw^`BX4!{&Z{(Njfa0ZWBPu*)D@A#-X; zA5Y8RyHzUHF2&D{c+bT3m@B0lTHb#1v^@JCJj#6gtjPrJorMr7M%BV4qVtc2R2#%r z_-L5m+nHy#PJ)XVG!07vFad}Ja->F#7!ICoi|@Z$Y=-?Q(AFUa6^Jrw!v2Jda4I6b z+3xFkSx5l_4CY+QJhhL;3y_MMLpTQYUheB2Tu$r-eVimRN2g9};kTstRrcb3rS;21 zEtk7C3W*O7`m9FkVpUqp=8H0>E^3Nl*S%Va@`b!2ZAjWx5y(ZWN15P#{Ph>EV&$*0 zL-7Vb2u5oKTYHj1V%G(KycDy5mTT>zzW8zrx0gQ-;nXh5Oq%MMvKK7D?|R3pQI{)TIL;+^inBgPP@4p3)+z&1aI!y= zcK)$j@QU?)@Wt~Gjmb*$mHWjrY%-KWk?dW@ga-l&9ZDVf-zV6?pbeySKrt!E)d(pR zq~zf^Fpk4L#eILnOhH+>zDN}DPOrTZ$6EXE>E zjzwGUlnU9Sjh2%w?y{VmKOOI!f13HU&?2e!-QH;3`OFJfRw{zSAH-Smaa4(0XBrYn zr;6m(cw7>V&+mdcC?2_q{4_^&evEFp*uqK}F+O0Ofy-$D`2upMVw&N;e1&w455eLI0~g8Q zgmb%+c<``Yulyr*H>BEs2+%rOjzGScLze@$Gw0KZ$<-!hS9x$Se~fS^?C54aaFBOm z5qe#InRAyTQXQwD(`=tuP! zh=&2lF$2vkxT=-GzmLXqgo<)b0wli{(8|S04>hYGn1IZy`+3_yo$0;n(j=W7;TwE=Z|UC_-l};!4Cz40Vg^ zSaljdamb>c%Q>SIyaWRA@D;^lQ2x$d(9jGwmlfi@ro|klV?+|5#2rMIMd<%1s`-S5 zy?Jn$@0X>Uq66xS(W5hi)^qehO2)gURJ`Z(9LWSIOb%TV^VKhpzM4kQeKO@L4J5V2 z7sFrh0y%F@vFgX!b8!&gxgRN?*H=wc*Z zBW7lwVA#X)AI3iI*0D-@EI+9Hf>GI8rdAc?TQCyiwdE3ppG$X>obnMKxYGmHz=g+n z`y5}stFuMZi&yu}p=3(nT;$RG#{B`mpL2JGVT#!Xpsxp#?}grEnCj&Z!T^%>hHYXN zmYcu)JRk#Y4$%BA$mNO2&1D5cIf_3+NstL zwuP3f4M6W`&g0x`-svMmH0_Vs6K$+~u+FBvbf-Rs^KNkT_OsLXfMCo#_b@NfOO|_A zE>~zHX}iEkFUnrDt0YSZfr(LpK6D(<-U;v>%PlM?RETyUg|)~tS8j%zGdr0)9c<5xb|=ifCrj-2>!rw@O7-KXlFl9HSi5}$L+-qb zL%$G6!HjQmhr>sWj@zF5I;LB2l}Uy}cX=q4sg~rvVo}9s8)owD3wxU{m$olwzQrzT zwOA*nVH#r5yWJ>m9XE9ZONeM#(+#(r7>H6v68x@8h;5eYwPuvguxuZGW8ri59G$V# zVdoEfjSvv94^-<0i~&VHr^LD><8o!dgV0*N6C8{tBE!_7F6jF!K>CAJ2rx>A=kqQI zh6Ev)mx7Wq3yvIAaSEM2eG4!+gz&>%^ry;zUo@&V?)@eW+kljYI!vZuXxfWq!_@{FKigSNfBvjL{dcrR7y%D z1!<7(P&%bWq&ox^P>@uR?rsDrrKC%`1!)n9Z*F*g@AsZ_{y7eYF;vF5_u6Z%Ij?zz zZf0(w%VTFgVM}fTtziLQfi(h4BKhWE|u3`h=?6WQw5jb zr@cngvI`;cd6sO&pu~>8AY3U=UrQ3km zKO8)Y=D_s;=8{D@9I|EJqm0I8FFNjQLO|LwNtTPaBpARFCNJV>VA6NIQ=74pU_md; zU%1uINkRWJD$vbSfMnM~y7w~^{6RuoJ)rzGwl~02kt-zja&fR1e7x@joR-}&-zn&J&hpvH`2@`;5S>OvUk2C2A@Cz zYmd+l-pj+48>pwhQAk=PvPlO2XQ%S*sf7>tZ&D*n!v83{Ak(FbjxIZ;k*)wQP$Xp( z+HDC-pgKDRaqoyiMh@h8%>Vu5i)kYE-1z5U8CiWRAW>0|^6l0pw8mRzD*G&3$;7c| z_bh_%ePYs24*YiL)UwteZIY#FW_C*gitQ-#?_w|u4KlF|Awll&TYl?CO5@~Pcoxb} zL(+m_XI&Zm3WeoWs*EF*C4JscOy5)u32*#l(WUaXf7)o1$Bu*k*0&aBCiI_GRQ7A* z!T%ommR|j}ZJpRAc~5&GHNwO`+)KGpBEADTGr<-v1SB2n0bR0}rc4u@4|ty@9GB-I z1(TyqjT;?iM=8=S-`ELMohD!C_P1Kl9G68o7%^ti}5DAWnYZK!P~g0qC6~ zGk7kgoDV@_Z4pGAfq}sdG#`E7k{3icbas#_5AJ{l264M-15QSkDsBp&^cJ8Par_!Y z{xYu57ce{LFBA0WCTw{NhSTGZw|I(lU<+4MPYt_?;iB(umsC z)M$-5k~U;YL!T*ptW959$+ZkiB$xKJW6(CQ*RnC|OAeo!TUkpHQB^_REprF@Qa(KD zj7>9m+0L{a*&Hj4(bn%Nuc3^=v zeu=rHhAa~|VZWf$g|0gbOl`S-nli&P-YVseJ+(JS)^Y_iBS=XAn}lah%)BF-q8p|# z_HZ+1%a0Y++iT#1h0%PD77o(|Ih$>#D$Cq0v!WMI_7B9TiG7}%l7f2w{yngRflBHI z^gGtFq5oeqbSr?7!E|-`XPqE)O^A>mZ0(wiix5Ev!qq!EKE9;}wsOc;kS_;p2&jfL zux_A=n?EHyv#w9e{(r?cSi(R>?lIUIH;7r)@>geE z&T%DC*Ul6vqH3*r)Iw0+>Oc2p!d~}W69~I1g*!{kJj^J)@z|`|uN+U==)}_q_=cR} zv1_S4;E~h_9`ujb?F#;}4+?K>ZKYp*^KBv1pg|Z(h@s*#O#z59MtcqHDp5+S8zlev zxvv2i426n#=m$hrV>np|+6+9_d2+SOR?Dkj|9>kE?s*3d4IwgbqEqc5evOR0vIadnfRj@WnID5rBq3k53~bdV0Kq#`dw>{R z)aHzPvho}R^s#%g_Zevd-liI+Wv9Gmxl=Kti9kbi{Vq&!+pJE&1P&%kEpUXz;CjSL zUYAxjpz=TLq~G0j7Ro4Dj!sdOs-0=QV=h2~x6&KO_12XOMv_>bxE#xmWv=YRxie`~ zsTg}49CkLUG#jaU_;()LQtjhKS@b@<>?c_Fu}_*z`{@G;RRQQ-ivw;i{(cnX#{R&( zI9St@l~B>u9*?jS>!uiKJ&y8$K-FtUb!GnsxyPoiy~UXAAMD7I;STFr@dCDAwI6nS z>9APJSJ7T5%OrA$^YH4Xo;2<$E5g&E1m|&OEXTAxhd@~U5h~UdV-Ir*Q*lIV($*`Yoff=&u z`svX*_3v2_==K8GfxIIF%J!d;VNKHWOmVwZj&n$PG>8BMT~yAGDP zV)1+$J^f()Jzq)s5~{dM;7P*mnc$7r&6T7i5-b=*dAOqA@o7a(R2gvIT8x&BiO;`t z=F68AF)6s?e^bV`gob_?slvWtw?zsX;d+(sM?R#H$73X$@U6Zcyc_dT>g~M~ncmSh zg-8DpT}AVPRhpjX2N-|sd+`w=E$>{|Q{g%9=uwrfXJU=`!r zdF!^|=S8R6X=5KVj*jc!vkWB>8Y*=uKR&Hrk{;=c)h&2<;;Cp-@$6Y$O3JoZZ)vug zhO6QkCme~W!=IzTJSk_J#Jj9*SN!@(#?b zeuQ=L`C++;;uQJ54XM@qX?R13L=?U1OpsClUG)LCGhappwZqpaDd7n%dGho!%31mcP!dg8n&AYW+Mv$tUJ*^E6#*fpf9ieXXHmSyhq5y^Z zAKtU0)3tK z6lf)8JP=TC$4cO5`fewMNXDz?gAwbg`W8xwh1y{>D)L((_!**3c@>t@6SnP3r%9lG z#(8Z;%+fzJVz~T2gm;ND`=y7}8I3A(%jNM?RSk1)YLtkPQELC3fSyCJZcd#uYSe`t zPPCw_&mp1{@drfK49c-v+UNeL$`Sj$;)FBc53@Yg@Bn%be+YC2wlVr?&ri=FDYXn6 zR7211Yq{qP7auLx>9C-Ol98&ml;sz)xKL1N4C{SktXwsda?qG0D=e-~QNdJw%P6Bv zD)###GXpb&vnl8C3omA_j;KeR@8x=qYw~p)Jv#sS^Djk3c_e8tZcT{L3knLhlbw}* zl5yQ6`%cEo{SRIcys!n2RFr)a7KKQuB<1YbnXK*Yg4zuD#4!bFa6yIBlc_@15Ej3R$uf)E+wq zSbgU}_tG)&#kyQGT`SikYa_*3v)uTK=fc+0zb^AM-6;yCjOD5kY)?t@W$TGd#3tkg zRe!k%0+snfLRTQ%TwRY8sl?R*<;;oZ(M=pv%U_wp%&L zpY-NU|6)a+T*1uN2GQD+h=hAK){H0P|;^N6pDE_L|9!wEm(uoQG5NMmi4{P}xl>tM-{PKilRVyjb5on@Idh{T;@vl ztyWhxkEm4Xmb@$z2c6Xsg&k9GBp&Brj_x!VN_2G8{4@TIhSQg|hpRc@xlEg1x34{7 z%^Pfd?241@xlaWa#gRyaviJ@GmCDr3KVbR42TLjJwmHq)v!I(_SX}&<<`OC;E$xKI z)dl_BHZznLC@ifoYBVYtMdHgqk@gZ{qe)1Rq7r$2t&nE-E|vPuy%h91TgACZL3$i? zh}4=mk&Esyrr0{DxR&?ZB#Tg=g*1XT*P4L%25Vm72#t8kvC3PhH5$JgU5qNJxj`Hd ztQ%D=V2_S5Q9d}bs?=M^{6bV*s@FGABvWg5U2o-jUud)QXNSrSrxKe-#hMzBaXCtf z-KBb6ltbDx{x-Fiq#yxJSkZVCcrl_cuU{{Sd@M^!nX<60+y^et6TJ@z>}&a&XjVL}_O zx%rr|`_X^UsuE!e32|P0UP60|kY~uQH1u6v2IX)4gn*5UGgK3Mu^mQDQwlG_e`oKP zR@oJ$qu-{;Z?rp{bITvMVRvziAP1@ASDT7SY-Lhl+#(zySU2M)U##WPk{e0xi&UWY znE(#zGNe}_`X8w9D3CC8#IX{*CI1Qi0rEET-w>D-Za7K-r_AwvVD_V-0C$*JtU_|B z0jdLw{b^86FZZ_{>m_xkPb4;hVJ3q1oA3N-IHf&T3msL_oizmBPH;UwW)Vy7`?A!y z1b*Av$6S&bp6TpCRv#waFjHUB5sM_M`HNfsnxpy{m6D?nsl`N+`qnwp_Mrr+yAvl5 zcE__j(rfj#IpR(!Eh+&oN*pZ?4vEZts=m*0&i)jz%K570$?mc`ImOQChpE*$-+CvL z6|1WA(BSgZyNMeQRK4=<2fmWI=TTOmkljRY!yDK#pB7hr=?Px)&ps{_HV$fBLY5x*ct`ZGGqJ5#fkK+}SHvG7>f`8jkU?{m>eYw!?C0 zZlETfujTQ<(0vPpMNTZ6SxPLMVBauHcX|M{^l7vgV6@+BxrQ-_NSC&^w-E>m?9abA ze+RQ*Bz{cS>v#`A#r5Vm-{Uzhes2Op5eRQ{Ljmco%1R4{VZ$cpwlH8N{i^3b%p;%# z9GwioY?1a*I0C`ZgSE-Q@9oFzmlU?TtC{?10X;s7q57V$W1`&JDS2EDR2wYWDe$sgOt!Pd^IJ@i zF|vQ>8Q+5~bzJp5U1U$V!lp>Y*^@~1{a9}nHx_?3>62TJQ{GCL8fLR&l#UY7mr~lw z#N>vofu9x~Tomqq_!Z@-cLpj7bJ$}X%X8~ZU@#pj4Wj%qU|z+yk!Y#tl@!*EDWoY*i`Lob8gEsgNzZ=W@S&Li5sL0{cqqIzilS(cZ(1PQOKzW^v`R zefAi7iJnt{4Uje^h@vjv4pQ%P$Do!Vts5Z0IyC3(lbSpE8U@jGRyj7yIAgt31G){W zFQTi^UHD$a7YykK-4A>-V#2q=%ON5{A&Jg;ZL9tDjkIK@HZpAaH*G;SIK^KOLn@Se z;0xWDJ9u?GR9*Wa(b>I^6C;_NBhWtJNixDFH4HAN;Uv9i^zSb*R;X?iqZ)CLs-$j- z4cyq%cZP!=DHGc1wI?UM(Xa798yp5L&( z2R_}4O*BNe0Pa2@yvRI+YKFRWJTaxRy^KF2y z#NSCeOtMu4^~tb|-zi<`wOE;7LlpEqO5Sf4Ifvb<8Go8gUq%JCI{E2285Z(h@H2T9 z{iAbxV1Pd)4d;#guK`+-M5v7ZS6mM8{`sBL?W>;l%o&#_Y>4-jgQZQ@fuFPFh_++q zsGHaW{Mm3nWxer(BYth8CO<3OtsW~*;vAe)1)#GVi!WRy)nOHIN@A47dYLQi?XZP$ zq^jjsG-?ezSR39AU2VNA)#pMAsY;DAo9hw8ODZ{o??B-L+?nCUdc;-|v_v`+kj}a_ zV)6=6JnzWboY#xdv!F2filk^h?Ib-~GdLA{b@jfO-dSm{tQrCg8_7q+oW(VgPRm@{ z{-=ssH?4zFp}PbFqmyk5Dro!;&#vSZ`hVhJOtZfh*WN216KI=VYC4{66BxMp*@JTT zQx%?i-^Pr!4HrKfpQ?txxU*zed3w7uzEN-MwOL2Ctac^l!@Ri>ouYIgs! zl&O16+*UO3rg23~si?L|oh>&yFNLd5kN{tVET zPN@6g=3IsHL3I?5h@pC4@ihs1t+3e$7EzhwVh2?DYd=BOWNntmN1YcSe% z|GQk!)hM53B5=~--t?DWV-zp52xg1-(#D21`!@ek*Yw$V#0+}N)YzdNJF?%NA}{kf z7h5xi5)A7);)C-LU>SnD>*Ev%U;F!ae~BMQr-WFdjY#?6%X-GyVbp-b#h~hs_R(62 zv#1uLV;usDmp(BX=^E{wF^LNM069kbd-`Zp3D(5KWwIaS!Wiyqr(<3~r|Zm4MUnPuq$)Fls^!aoRlpp1PgAmg^KCj^0i?N?}LxXV!q)RiJ6j7|z>g z^POs6RT_&=q9}kVBslXv%m%bNnV;NzW3R0Hpu*;1&FCPplb58hg4Qi1U+Vb;8RezT zyl=%&Z&t40MJ9A}kCWdXWYmc9wO%6G!lxChVSS@)oYk_pdOTvqchRr;*Pt$S2#H_m zDCLzmVRDjbihC+c%%O}fF#}a!l2C=ShU|x*Gb>9yyxTEvs*pKItF@F>{g!k%RP81J_BzZrMUAfKYd)65iWi%3K4ts z_jsP=94S7EP|$MXP?EjnMAI{2W*HEqM~{-hoNxJ-`gV)s=S~P2);+ue%@oXP{ zw_~HgMGmar-)iU3#0J~xUafCb4TQt*%I5miENcx~4IcI{&iR@MS$cY{tCtd%y|vTe z%l~1v_ro-Qu_!e86N}(Di@VYZehgz*c9lMhUzheg-p#uFxRbfE-rx%PvGxpg4$pYe zzBGHrzMWu{w2!mJ5&iPI=>Rt-Z;!T>!ZuE5^L zWSOfJo%-O#LX5^H)QOnSaB&%9j+hFR?!gOS;c1R8;_^;T6hwmu4c7&JqN<;S z7T#;JBI(y(85J@_KgeMus$%3xgxuJ9AR03{GQ-ambCLG8kR1ON{o0fBL;U>b0i#r0 zk0ikh;>oFV6^%=fknD{>;&5Z<7eQ+RQtqnt3Y&3W#F*N3!Xc<3BZFQ=25VPNMZyCE z9LqCP-?sNbp^>z@YdX$q^L##m8Y2ZOgYybkOiHgVTF|{2cEVrwDV?SwFjqLY?SS$-AdVor>h+&}*m&t5WmqdhG> z-k-m7G1K6sK8N(=%k|G4UqozpXP;=dJmSeGGJ`Y6Rly4m|9UMc<+@4L1~D{L7QK&5 znzzhbrbqcZveP?bpLPUa0JY-mhZpPZ+fpFD8u<`(Ub9nh7%AQe6c%r`^XWk<7RZIQ z^-)v=g2sK&-30se;I#5#5i2`MM}4K&&yma)B7pLMyboLOO1pAK<9uOSx6#AbxI_SQy0p$la~Zd^1e`6k(Cc$b)eC};e2xIO>~Zo&4ILNBr91}r|R;P)yS9eq>``8 zICY1Zu-78F^@IMZk^f@JZVu3jCsHS;?4CAN6F1zM@HX8MG1KeyO`v6HrD*)QE-0<2 zi=En`5nKG&BsxW%f$8HF2NirLPW)LK^aO$T*a9KctRYuif(t`<-_fcMjLByft|;5) ziKcg$a&Asvith^?pz$V>5q@+2C;q?r9U=~B)CasrDkbM4d zg4>g;o!0`9umrCa@&Q6CN}nw&Am6ZBl;xsKn-C@jKXmovMJp>8i1E#ypC03^`D+5UZd(Qc(?h z_OXknd{qe_J0m+q&EM>^X3(-oL>)m{q`(JW$_W9Rp^@+|^u&m7S;JN6;n4`uZHlNj z{EY?C?ZN7ozlSVJ;;;o(<_`>my4DZknU>~4Y*#3gY5F&+Y|?0BcriNjc-qyUpD?JE zP2$vyoDj5<;LzgsnX+CU?Ra;7m3`@kX!hVZ{pS}8>h`Jgt5;bko_%8n&r_iv{Rsu+ z^G=7m7S`bD4et9F>;Ep&jI6uJB@5pVRy*rQoP}qo5d@X1()a3t(90t}Cjq^}&j13g z4p^trO0CI6nYBwL)u+@qCsJHV3+9Toe8PGe4OOJVgKb`8>|MUb|Bv6kReN2Vx?Y$_ zn-2TFP1ZJlsk%k~N0ov$yTi-2bDYvqS#sG1)cP@@x=&3gxs}PA@5M>ntx|0qI1#A* zCkybUp~G}eGta?fE~$b&w=r_|H^8Gb&Hh$Lo0FKzmGb+HS4Kuu0-#_cv>UVfFX)lA*dm zr0(CQtPpvLbPeXhkoMbgOC=+lpR@y)VY^C@8j1gFPJ@z0PSUvM)%o4Wh+7J+-i)Hx z;bp|7C0D*9_2Uy{Vuspy= zwtZSA`NsJR`dQqk$9-q9>ewA%oUYan*1R-Gr~+ik&q3SryB(t2BnAk^tq4rq|MCv- zv+1wXF{v4)@X{cS^v7;*-5>jLmq6MYNKvo*Wka&@~<*#?aD(z57sbJ_h`jR)H;+NkqUQFNW1j;&{+}8e%oGe zzxP2j(Ol9R%+@-Yb>m(zv@RbOb4~Lfrf1&alMkrniWu%G7h`(pGDt6&zdk&|A)sVc zU`5i){h{BXc+sC{oI7`?}qGaMS zJy***{O&8qY;=L*T=MV}W0CC6f}N~uxZ%O;eb%)WuDyq^v{f~&99p#_{G9i0P69UW zOP}(?bn|&VvjyH2Uz|JskYR)TFtP+g8YHbAJj1r&xHQ^Z9YhQ{!S^(N{nncfdzkRe zAP_cqDMZH%R=vzTlb4ZNN1qK0xcCG}4?ckZ^jYo^Snh&Sz(gg#4+lq%H4T?U=xca%JU7Ja2Q-7AJe4rQ=<`>lBk}7BBHBvzc*shn}QVTyM6~ zC6!bi+1;BI(W4bzYfcGREGhig`-l()%kqmSsYQS8QH28zbbl`~_h%S2SZwo2 z`(DW|{r>~{O&O-_&R?Sl!dNgJ7_}}8bs3QmBtLqNPC=NRM`$%hQ)8{pgDH^6&@GK; z%ZmA4=d-+C^RpEPd9e>k=!vYNSBGfrpgs5%{is*xil#$hE067eNTyte#4CxXjcevv zz(O|qYTBHgmI9vwsz?1swEP$2hS4&8_)Y;e?1xh*@@MDFB--GaO7;l8tB%# z-Ss|fM3%{+Pm*G$Tk!X8Wr0|#2zViZug=Bv;<*QW}wz0Exjsl=5}`O1nf4 zcOO1|dJS=0L8SZFu3bYic_6m;6*xjN3Jd?7yX#Uzix42eB*Gu4yGT-FobI!W2;jQA zHrz9;$UaXHlW;#$@YEKoX~%_;5jVmz0SzWOc&}Cn@=D$B)#|TuD9Ms`i6yUuy5S8i z8VB=U(2MC1B@6&Zab|b6*YibP0viBjg?=}qX->)DoKIMb3o9sG@r+kQ+WKpu>@g#u z;6>GXu?yuOAcs?!IrXRd@Lnn!?B_8|U z+|w~f^mz6Q0?qd;M~MOZEHHVTQZceMxz)Py9>G2jvnfyOc`v;eaL2inOBkM$jtfr0 z@1cQVAyJ0&xX?fwxH$^A;RR7nDSja-*XbFp9U=*0ICeCKC^~R zQ@91O0Sb7Y*y=&($zXKrdVGQ~M*%3EY(=+#Ll8D-fI+t!pk>M*AI?A)62euP_5s!A9GO!N{+duv5)QQ z8HVO^9N1QlQ5nI6&jx?Kxul};r^|70BpiQIgvK`oKehMG;xkQ&1jU{A61NBj$>x-r ze{{)T`(~WLMrdI0IG+=XsuP}#Ma@!i7{SpVtlZDALbJzHYU^j%e_XX9;rO+m3E1~? zCeh;qTO$+jeY)eX|AWTsMZ%pK%$6}WoLI`?sCjF;5fd4d)|#G;pv=wo)i}^YS*$+6 zr{ztDqcdWBo#*;8v*nWvmy70l(K28{V<#qn7i(;N%ewYzXSj>drW)}H*R@L>V+5XS zvl`G=C|?beaA#|Hh+SyWe?6R5keuX^oA7^K?spu)C}brgdZhMBaQ-eAp2C;AHES8xk06 z)_nzR$y(My@=iX3tP_$%35LG(5GsW#1gSZ~-m92&p@D(kw17tCV`;2fd-vx#|32LJ zAV)#S=)huCaBw@sB}qi5qu1>H9kFSb@;Sjn{qgRz6|Zk=1pc4gmB@}qOAMmqa}FNN zeI4ACY~Pv;Z6e1j#hs>0)3c4-4^)P?H~(gEekfA^D`ZfWN4{{VeU$cKkr(K~%h__z z*0AHpdl^d08kQ7gyv$uk`d4bQ?`vuwV%RFu{O;fSSsdarFtVyEn*x|i{#5e#(~^vi zeeIf?Db74h!S(oXWP+UPFb$_BKLawR-*R(hs;VZU?{%a=Y_ZYX6Fu-v38li$q9l;pfZZ$v(tX77~Of+i?vC3qruXYXx_LR2V7%eY>AM zkamWk_5?1Y-@$Vg^ zcZ8#LW642P4it&$ zPM>`ooh97@Q2fE+P4rB{@Sm>*hH^NPp|L2NoW`{n8i~lX-2Uf?k;rh-*2O!M|B{sT z-FaWk>QXW0%=F$zHTN*N%OYs+-NzL0ylAUpn6*Gs%Ug75Lr=?}i7Y}-`<0sJ=H{4j zt+V`f?q@IAscpg3`xPP{g{ZnFK=0At%LRfFw+W=mz7uligy;@&z5gs+5iFNjSu?Es z>`|XAIZiu*VnbMw6$c$b(~@8P`Ru$l)HGvn2zQNblRV-Sg?ta>4nb_cp+7@;g9|p2 z4W^VcSP3)NghWSh*F!SAfHp*S7eVQ83Dx@|nOcp1@y;{#D52`&ngnAce0<{bGaWUB zu!RK$BUD4`O=1Gt$xrob0f5>f6HW>^(?lvBl?Hu~_92E_j z7VDfm_y0SmEG29(_@!)BC=A> zrw6*|ClFYQu|MA{MHgwUwa``3@a?Zy>Wsg7w7Oz2o%Q!UbIOC z-bivoR}|X9l#!J!vK(YZ0u>P{)x05%E%o$Y*4hzQtfIJID9^4y+@bS zy|OZql%Ew@pVV#DsdN6jlgbSE>%+PD?cL|R&Kp)V?PmTJ2jQ8+U$|xyeynRwJ*oN~ zdy?v+nX3d7`1<^NaEbKlyg_?UpXYVoM!!Der!bTD<(VEFG?DBSF-vR34|L-t4t~vO zVI9F@j{LZCPZ4n0rwZ=|^p|&HK0-?)_)7k|kNB#--*?lXmJQF5Z)rkHgXEGdY)Q#? zWpRhNX-Z_T6(;uw=vX?6SpM0r#x_)zPN+-5E%`I-C(hb}=xox0@`E3-%l zUlz7N*4Us0mwJYa^YlQ+Kz?L`Oia0F56%Tt(1_b)&_Av(<7T$Zp0s_`bjiezNT&0? zkvRO;_{ze2ub%t?#o$awLD=TR*GEy0YLAlAm<*)&a--g_B}-k1kjyt~Vh}UU^I~ra z+`(Du?h_#pQF4EGOUXbg*m$R5hyIbg{8uFQ57HAZgK-fO9EZ5xBR(upMPEUtGM^v> zFy1P^>Fv$O_Ml@B&r=Wjq<)Tvoc)|;PXwkjfPx&i!6X_il#jHe+CK=nlc1tv7XAh} zM^ccIIxW8&f`q=txdS8|kDZ6C^>gz6su0TXXpS!7_>N|sAsXgwap`g&45Rnl9X4{+ zW`u98hQ)pm5#4!?OUSK!&r_hqu{u3Fmd)o==|B$kFTI*CFTSzcn!J89c~nb1f4OSw}SU&fXr)ILOeQ>hHX75JewyVbwdk zS_b9g{;>4=STgYn>Ia&x+r|fio0eCcGaV?EZ3>!W!#qr^ZnKBx#T2FqH}bfAySU4* zx~PUvo=zHOsyoe_Q8;qfv7-n1EDho9Uv4isJJZmV=in@@v+W*U2{ZjL#pWhpvSQn! zjm3gR_0|^;S9IgBP9;j5!Q!aSb6RBNhWx`^RjdFducUn;reCP;hbvrZ<%L8JVH6dA zo!>r2Uf%dfA*NYWZPF-)>k5Z>L^@bm94&c{>~;u*|6<%(jd()q{%}O{LU%uDlUu)L zg@c|JPpfA+7HZlYM@{w<`)!^EE8pOf5Hr0$SVWZtOf5fDaF!;-Q&Ut~9bf{8yrXES zUlUcAZ>i@t0=uoqypNGhzm`%aodnR8n~1~%!b7ftE{!2S{=Bq0`TYk0%8vfhA~jw)Ddm#+Ii$CB^{mdRA?ZBA=mk<{yux9ZL{ zHv+|iJoq*z?=p;yYYn=2c+1`K7Z%q4c}UCco~~JxI3LCTSgCywB!jhH)s`Sf>vg}b zHseEq&7CA-o1q^aNyN`&mE8O9(L_=;lUao}rx8X>0J{9_pa*B_m(45^!DEZ3e%2gKG83gCBKCFAMA@%!pi zKrn2mlS)jOZV^p6YvvD!R*cNT-mr7@fwO-b_0eWH<=`#a0*kdf9`bj;@3$%>U%2(F zH=C3)y6^DyH0{fWEv*c~*Csu`6Nu>0*}DAH$*U_b3h8L9n!JUv>ntHF`z`2AZZ6w| z*XeO3*moP^4i{!4+!pA{e&{Dlu#98bUUIx;C)N4B8zuwD>&P#OQ-UO;(6 zQ~(AdNI@EWaDuEIy*oHK2qHQuV`Ikdr2A{*H~d@EfBq>mSy5n#X{^$Vq>gp6jbI5a z3@$$u3DVuD@3Xy-t3JH+8hVh`DpDDIe@8zCQ?BWCE2d}^Zot2H&1jlqeN^%sc2^aO zG*^4(@I^CEYpi8R*B%hsbv)Ullh?}I^Gg1?UHU7BqTqAb=iz=+EAnEN+qW-xFE-t6 z+~FDQ8!nlN-;Nhk_DcG%qF7yCTX|*G*}OKYS~5J;X67$@s|TmR{rUZm%T$mZsSy9I zXxN^_0}qSOByZm>^y*zLw>xIV{|uXXu7w&W(5`iU`v$oye?4iu%_MT5Bw1SEnk|~6 zGZc?my7r+*Plh%PC*bQ2*CKEb-HQIvZukicF~x_xHft$ZaCz}Eg&JG-m?tVDdb($+ zP4xO!@#QC|B;tjqPrXNf{hiWQWL#bgR{y(2wI%L@_Uff0by^p}`?{jQQKmPWH9lMH z$xMws!Rif>&v0jli7^5RFJPvAy%(eGX75=*alN2lT? zCCE_&rqw%PPXWZv6LkPB=~kZ<-3+`E5~) z9BPUZ*@JG$;PZaYuV2<@)aSPK?yU^jk$=zRg4_;`>XKho@Z}!tY`xN%`aBafhFg+0 zc&xu6Oxx)gmn-xqXny@#wA0b=nF$ePkBee$R#D#)F5DQ)`N-pj8^zbNvR);4_*^Fa zr)!wiBPjz#FWtwqp7)O(f1L7f&tTibub;YcLpxz zxFw;WyxLDgi8UR{F>N9`AnQVqf@0NiqgL@AU_0|A00Ed0f;-}SXJKIx^`XQPxR&>OOUx9(pg#7Tk&)&SI$wCh8|VTE?@sbt zM;$1E3E*pYV{&}G=jTiRu$=sATySD|aI}d5oz-Od`z}+^2;qVcDL8#Ti*SXP6P~QN zo*r#Ibd;t`y5O6SGKBuMt6b6-=@f^b@cdiOla{iRtZuXM$#lvqh$Cg;yZd5iW5 zMm^91n9Lo{_=JZbq|-{Kbz{89jsRW5&D;_&5SfSLL<3a-{nNPx;)0OhO{s4hV>dF)?T;LrY6b z$^WbY^B)%%*H`ll1Xgwmh9`j1pf4#M*~uQMe}>J;Ra}GR6lvaP@dF*?of1IR+JdRw zpiM1kJ1ZArbqtF%?IBjE*|emp{Qmxk3Cpcp&A>K~92iiom;+1Zxv|d{O1qH$+twL# zs}G*HuE%;A+EF;V6gnD-=#P5B-{kby8AJK7YqGxDh#NoCnTTZ^o+&IYBjfg0@UXL=37`y z2z>|QTez_Csr`{F4iY=R}xo$cZ>jD_ zRPz2spU+ldz`^_am??|C$#K%w%5`}Mp38w$g${#}t73uPq|OD%dHcP#bIemX_LWFY z;sk6uPxv@54(|_EMGTKheAj;*{*f%tRsX*Sl(q;2JGqSVDQP4C7Yu^2Q~x_XNPtSC zu~Em%qQ^&dXq_-c5^_T^$$I!?baa$9z92UjZ@j|Jj`ye2_i>o;xy!I>6G1+72Y zmPPU-karGTQdjaTf+7tI4M+fUXZkrt%D1unyHy}2f7Zx~jRgAI9oiJ|>y!>3Z@68{pPXMZU z(I%in^|jZwm%n9Mfux>HuAiJu0r0Pgy@YYVsCzbSJpRZ;Wu(2d`lV31ATssu& zO!?fy(XMJTev@fChb_MdJ~3rd>cir|$q}YR;N|}G=U!$f0GUK}>$eCOp@aXRYVG=N zA6~nkLU^nS?E;`>l2aN{M0LT2j)rdhj4FkNd;D!J&=S2DVI-uN*7Yi~_bNs*K@bM`IDdhu>x?}g1dU{xPSLB(vy=^@ z(opZ|1LUH_IHc(>e~J?E#vz`@&5|H({lDkhs6iK#@e62tCRx6Hwt)|`@$3Tzs6Z6f z+Pc>^Qh9=ucS=*9AC4eQbm9&@LBWYCcG1ilX^vhC#RW_I;Z4kfsX~bvSq`$zS+W&3 zbNRb2aoik&8vi!}{D~p=&S6)gAO)m)Zc-kA%Z{LJ?bgGisncTvoc}_S)Ao^cjxBf| zgoySXVIJQqs3k!adi=5do^+oE5n})_V*iy)W4e6Y_L4){*&|HUVEK~ts)#S5UB4f+fNHg7)9*`u@SJ z30HYsnCulYR=52b*s+Bh1Zpmu_JwL@Q47G=`&Kg2L7BHv_BVAHSLxr=+i?A&y4?>$ zJ_^4w{^eAKCC)JkQeef;=a-P33jRPy_*_u4542Ib&pNJwB_xh> z$oW#LSNfj~X(Cd}Z^rCuBt}6p`{`tgp zA8+_5Ma6-4GF9hr{a2p=H-jWPBlu4i+>BQTc`^2BF5TrUuV{0PBmA*w=>E6dbPjq_ zpm)MRg`+s8`W}*WUfOM^3k%f38j2^1rIF8;-AMG_Uc9nu=UM(Wge8GTr>7l*- zM)f^G6mDS%fu^LOwY>0Hnia$5%}`@csa|qE`m`}y3Fk}ybffQJzIoIAr*gffQ)oou zf>Uv8`hv3xkMSo$LFoaD=)YwD2|`Q44VF#(yJ z{JL{AJNRxck8dKn6OzDx=azS8{cA9bn798up=V**aS8xnPAkRt05zGIVvHgk>(Aj z{Zuw1Wp0|)QBHGNFoeCCaSIBZixG&+p7dwPkCa+mR!A1cFB$#iKnIyTa2jL%XL8bg zW8!?m`)Ue388T0;91`>k+5p8bg78OBnIW>Jlla&#{`5FNqOiJ+4<_psQUcmJHsdd zC&Z{hA;lmRw?IF}=Ry9>z|o%vJPTfb)Wtu)H6C&gVaVk;L;}SxUzaog!wVk~T->pW zxzCCe#%&czNv!;aljmoVN+X9oWQ^O+>!Ew(wzGy8K zx|I#QkTurP^u3B2Qri%V>FTxCWmgutF-Q+g%Usn#W}<8#g^WOjY*Al1M8M>zCy@xj zEOXM67QbvY^Xtk3HenP=!e7n$#*l1X2?x*caKUvfN6KCuGv!Vu2nI&P|{OFvLZ@^tik z+7LUTGjL{29(-X(6Bv6N(hP0$JK2@+9A%y&z{&n<*b~ur&eU_3t}X)qX=k zBFHXIZCyMndzp|QK#=?}h{}a9Rl{yY8|O88BgMjaf-`{{?lTQnrKJL($IVh#2dz?9 zhqOu$M&U+pjS-clH)L4l`1An57Vc6+9W|+z6}35bgYiKo@2F8(nDkJwxP56=u^G~r z>K~+jw>cV>NBUBGhkGYPIZWU;PPa;Oek8RMf`Auh_+QVnjFoXD!FLNzsKyz^l8!DB z8<((gHu6i}hmm>FGDMgyGJ|k)M2AQucLVG|`Kc?*K>MeuMA_s>KFVBH^hc~fP`u== z&Rlh1_-o()t#+~5-uVa4tMPpr1yPRZ>Mv=QA->#=3xt-Zq(p*!Z}zN94vr<$tl1Y9(a1zVL5B}ol~~%pkF=s4xt~= z5h2m6V7= zneoQ>U~p>dtFX#1XSi&*)EgU{a={yD0wQIT%ZDYPW-3n=%v8vRk0FvP5g_NzYMhuT zP7@omv^_FU2v{$Cd_$0UV=cCBSW2zQ^lJ1UT?zu08i*1NiESv;x{#X{xcNY^As} zC&+;;37zwK#F`RkL1`%|GKhl$91q0aK+Q6Nq@&7d-%Cos%0awsVXkYxP6z5zB!;!& z-{fxrUB16@^f*emd8gBZRP(Ab2N+`+z>|BJ4P1PQYR1$-PvEAKxNT6HlG!Zh%ZTUI z5e)J&v8CndIhPLb4=@PCI}?aPtPZ5W;eN;$ zyZM80Nov3JzLDCy{8H|_$2gaGlGVuK+qMrK;ivd&E}KJU`}&xou4Wo(P(YaA1^N8G zz>}0>?w1RTBa&*$7=8hrtwZq|DgRx2@P$Hw7dR&$3~m^4PVMbAK>jJHw=krQ!Nwf; z5T?``NC4~K<1ah4uy~kvK2xm#mT}ypN6>J1c{lFv(KCX7{RUoK2Pj+k_fIov5@~*Z zewcaS*LuN7nCj6OkWshXY z5AC73D!i`!^(tn?LL3~0qVv;@u({gUmlhwxnY>wSkCsJIrQ4-9y#S_k=DSzDb6E)` z%wFqjKUc6frkMHYj;)v|KoM9?_tQI(@Oay__-vDLi7VTMWU9A30_XCWGYDHQ*xY*o z1Djq?$Ln}8r51DMc)MVc#s?ios)0f+3GdB@l(TIQG|A}U6!`~?({LrKptCs zc>=Kn{i_#;bC~`ssbv`+=`yuxf7%do;II?;b3u;r-~lb_vhnWSJMb&|@#9BtjrSQq zt`~vGg#vqnIzK6h5amrI@PjRy-ZR4iVlbF^V{iu9yO~uDu|CVe?Q0qOB{-_mZ?;rz z{gc%T1WU`ysNbRFQY+Vas5Aree-|;GDJ2&$emfgBAK_3&-8};SUC2A%^xsN+4j z7Dv=clVo7INC|X;ko|Y|bt(HXDqt43X{gh&mOo+9d-i@DIIIfEpbs##G$Os9G9cas zwAO2<)80?Kexj`nJo}zwG4<=~`&i3UU+TySXtFEk1Sy((lsb8QG6+fjV}(RJHACOK zg>54U2uu55@rRO9%O6a>84(_2Vg8tGTU)o+tbd(+z@;B8D|moE2@GFBdb%o!9 zKG`VTyMO;a2BUB0D~b#TVShjdcm+i~8f@lI0DeK-BKvREug4P(Q)6T~&!@%THf?9Y zfS6G-!IcYBKV8)AvERW!>|3As?^_ocf4d*HSKHU^&IJ)qQPj$ZfwB^vYS?9`tR^+$ zn>uaRntP)36mFi83kZO%5>hYcQx_pZ0>yw0kLIJwowZYkfMNAT;Ka#DPS{1deV-1r zQP|~fo*;VE;qMP@=7GxCBNevW#<4w;Xgly#&x&K-ya3osRcEsw`jU!ftp0- z8twG|e)U1H2~@tqUcaxS_M^ZwttUQWO!dYVpA2BR!A61eff_fGk^=ed>|bqY>HlSG z@v)<$1$1R57MEX~(0)FW;yvGxCKvpe1?D4k!@COvWl^d*_6aF|YU8!WWqh??C` z3l?SfmBG2koT+Mp4R-o&4K=xuPV4qoJio^i=q0Pm7;c&*AIJ|HY94rX>!%iM%z#E^ zb{D;fH_UH-a03(aY-UgqUN;j(JK70oH>6R?nJRbaNrH+#TqE-7&`Y zx?GOq#!iQ_|3&)mL0v0^PtYuR&L;_+}VR7mRVIzR-+>HACe4f;v9DZ7C}vs<~LAaI?^?pnF89$y|6`gwIGSQRkjWh@5DJ zhy~T1mRoBTsk$W!tYZb~zM_xrrEQ$<>mJqqT;247kQ3r&V}E)jYOqS_%|9s%2RKjL z564Y5A$rU67aWV1p-OE`BSXp*K>!$i+&?{T3&k9qWQRcfacJAkb272HpmTo|p&3g- z!>|3#VFf~D7005z5}}$A|b`9*rQNQ zf`)vI0g?D2X*~kMG7?kpU(?aiVTZo3f7fU7c}nkx^hfqz!)xq;bq*oLLx^w=l40q{ zo^jN^-9D8N;aStdmRA1csKgznnRDU&Ga6Q>0hO@t)GVPm9_HO*ZgMd2vQ2NX)NBT! zt{7pC{eA=HGWs>|={Gz@3ieYphHO-B?&NMe)8sMD=M(Oc?);)T9yqdC-7XyK8o4qg zGc#+QCtY=TGx`O{H65K}U=qbbC(5L!w@f#qj&qVJ_-gAgJS~tnRUNN8IzSd(!LA-W zxIQs@JCgP1OP6XlxBb@n|0pKB8)X_!x>;ZRL~Zf+TSy-c-DK6(*WdgdTptobq@!QH zRZ47glQ7xU)~5T$?zsyS3$ z=K9OPRwU+vs$z10;=L{so@=(UkGiI7>BA0{u1e?(zyvxPmlM8RVHVxioBr64?0&!E zIdY2A7?7fsFeMRT_R0IMYwOkB-KwbA$B;JHpWffEPOHSgVy$Bs{b}IhS!;>lU%5;D z*|Fhwos=^ub%{iMPNREcE34JsT$F0MoYtZ z{}>1%TgocU^sk*t`Q=_^r@8^Oj?%KqKQZZthUAm2+A%iTfFq9IwwrUC%h3-p7=GPl z$|mb@M(!EY)KyzZ#54}L7$|vHO`HVQNayn~lUuU1O5s#S zLuUM%mLT*2DgWoo$?qo5P`i;W+MXzwqK?uHvo*E?i-fIPo#9a~B^-HogVJvzsH2Y< zs>|fXHH*XVF5{q%k@@BO<{sg_Uhl&b!fZ7(G{Yp0Qe~dvROe}VAT4osIw$afdOaTb z(F}Rl(c!UK{MNDopmPOnax^bAX`zz&-)NyaNjMa8Adp`0VHe00-L z$y!7dcOl|_rZ5oOmN0o{79-En$%f&E7cF@aaxW`f6qKrHn&$({smTheWK54ESnm)ISXi`R&&LHY6iszwr^Ewp~aASLX;cHAWCcsox?ZRB4Rd4ZO#`yfV z)#{xK-#u&wcZi1{+`PWrJ7##)bigpigxf9Y496k9E5GY*XFPjG1bUb`=@>ItTndfe zEw!dyqwuWi_uTWl}^ejBZjYv<}3A?vpd-H) z%>Cc8#yYqUKVxkvrQrs_JH#KQiu-b}tDJN@BKd8_3;QgRqD@ti*1_xe31MmdsM)i` zcic3ElfE7>`FxrlvWkvfjfpRdM=vof{9dzJ*t+&pv*yhvn4HR+5a4bL_f3fddNv!< zM{W|5USf~On-nP05`niOKuXT11k&uQO1;6dWaoi+UumAtmzZ?ej=FkhG_Ej##r?;q(GOB7&nIpb@{^o|oKmr4*q zNT#3`!93J`BY)oNmC?eB@u-)>8sW^kWXx2ctS^diZ#5$IA&|YB=f`+EY-+)-4L;;^ z>pYcnMrUWbt50P)8O+eAX~9(Zcle^c4Lo&@xbsE+yM90%I0~i?re{b1R0aREdQ?Z$ zw4pm-(b2XcaOS#gA!58>1rXy!-r?dTmD1$?viJQSt2Z{@+#6ZzHnw=E7V1mR4 z+~i=Dfep!d{bRCx3yV;i_)ckbnNibt;3br*p_(kpGp4@0pw!``D=`$fL(UikYDT%jo^sDwaF9aYd+eaHiin6v z61hPGZp<(ACN~#b*o1F%9s>{qDX@+hJMEKR9|@-Lr9cnOFiK;6@VE_nlqdXMdWOaf zMdG|Jp#$60vvy}RToswz@ldQS50u<$wGQ8L*9@zqB`bpMZObKvj>JyrxY10aYv_~t zp{nxNvCw44wzL)GXwa!s$aUfZG_2X5E`f)6ZQnut#c11=-50WdXq-WhNi(bSLoEhu zZEbPhps0TIP|zgu;uA5^&2K0{Z4M$nn5LKO40X5qtEWbx$;exjHNY7;S~*i-C)NXY zu+CpDomOMt{)l!bYBg&T~at@l6jbaOiq?>ID<_oV#|P) zQ4_L=0>3<~AMX>=1(R_ffz_UY$|91Dgj{ueZH(&{g^U{k@mWNDe8CR=?5!y(Sb2A# zL-`myoclRzAe|T#neT&a5LFtH%RIu85LN_rH<_(bN^DM!%em(n83a(o2^n&1d3EhW zk@L9nKSpKb(6I&4vlid;7ZQn1Ap(%V!5$$abHb6d-s*?9{tssb?}?li~yz{N`e$oZ^!!a z!WaaU{hsdrAS!L!#XAVCXW_9L;g3o8+TE>58EkfT15z~r*|4(tOJ^tQ!Ut-a+dt@b z{aoO=x*SJYt(Qa6#mF9>e=P$-DprWG0q$u5_~I_)=0IZEC47p{@wSw%KD2pgefSa|PMT9=I(w-CQc&~Ez_c_m}cifEW&d7+l z84#&=WirIaH{s`tWkqPzUdQJ9=eP5A(OzSZ)}QN(ql zv4PAm#NA8BeBJjVbmIYp3BF`^8LIUh6fo$+B**T-}WN1um z(&Y?UD5n7!IVp)ou#eo?=j`cgC;sM+@;kk%@RO8X<`WBVKGCb}6V8gIIC|+;b<|9$ zubUtCcqJk%01=hcjS+4JzG$r>jK*LprgL(E;kpllJ1Sx=E@NQRK~z+bqk|1)*P%c9lnYIw>fqkk~;-QM?nC|1|%U|kCVA}eg#OAHqXCg z8Ej9-pT8U~mui|g>fxHAWFJH?q`%=1(;vAM&=yDaqT%STlTA;ScY}(vxDD33f<33Q zcNKhG)LIUDw5K2Z$P5lA3>1N1I|k&VMqpgwrX$q*aGveC`X>?ls$$}?ofBwu<)|_| zPAGtUOpEkX^`I}P`_3j-sj8{jE*qnF1`&X=NB3Aud^AVBl+A@+Gi&)>T3u59Oj(9* z-Qc?hjFEP}zs4R@^Eb8r6U~+zNdYolnuw05OZZSFjt%83aOvfry+AkX@94(Zea< z)k*@p+Q#x^RrAW9KWx)>A_D&Y`Q5gfy1GBed?aFT%$?2GJBR^-&5FtfysR#Y)S5rX zaz3b2B|bG}6H&jgI1F005%)2MZ%GZfigR!M@bvlEC4&nUb=J%tImNtx2C(?e|?)XTpcumQWOq0t7JHw zMBwK*mX&DQc#lJ2%gfKdxBO;mi?hvS$M>tdJNKSK-)@1otF69Y8e@<4aDi)2xzsM1 z&d*0he-;PnxRpGA)IKr!phIFwar=Vr;d>8~6eZ94b65|bqV`@ZXo<`GQe^(q@R_ye zywLnX|Bkq)c`HT^=~VA}YIOVlL^?H8xSu-}wp_WUS24sk>N4l?tcqu(rQSvx7^2Wv zm>UhsKgJbQu)Yz4Qm@c7I#&7*DVyIq2Q3-#H=pH;A0LbIQ*qc46(W-GT<8FSjTcMh=gCar{Rcg~dhW)&q_6Je7@P4Sd_`O1%78Oka=N!)dKifNx;maO_dDnE5GpD z8!N!zs&SoorvK1y=E5fElMbv@}9{R8DSgu&@*gbmNAT zQ?r}FN#T-?p|RC4OV4;Ux#;~;xs6j#f)sILU#66uoeNd+sfR0XEPf0TIUh1LKfeLa zlAfihpRd1xW8)>XreZ$pgaM0sM42Hl-s?SAicq3Pu-2A0R6zCYiZL?Sbw()t*9l?D z{;)=#y@VJLXGoM#>UKWi<$-o;SemGIoEz$x`FGkHfYu);+_UW8zAUMn}0(Q5)HNiY6akf@FL~0)j2p#;8#~|oM-{f5> zMqQAR1|C{7=zug6x4JW^PCaR>Snan1SsR7%?H>sSl^_7q%5L%PwiSiw2ZLFMXYH}y zgun}wH$0c!kJbT0c&O}5sn?2a(uL#3?tRywtuo6WV>s}6OXR7O>>^G(J&YS_qdUtl zusWDEsNawj9F$oM>>^M9b&{UPxa#3_K(1G_rGCVL?&DQDJHGFEqsjR1BTdXEA zwL{w;DDHmh+1HTe(xc?*l`R3wA63MiA6yV57no|bc-^yAAfJhy2M^f(HIjO~Qe&n~E>B~1dBl%}4leHX%h7z>zzhCp>*4Y2}#{LS&XVQ9R zTGcM@Id_KyLU}k?X_aCJ;Ng*#NN0~W4kA+meDY=a;GNU^z5kxY_)m&F`TG~7_%wrF(DjBlY%;dq|3oUPRe*HT|xPFoN2Pf12%nbhRx=t~iX4rX3$Pd^<(l?b8-WC_oK}wn047e$>TZ=r^Ops~Bbf}r00!=FMx@hdWq4=;vtjCI4z)~yiVW;09gYhxlk_rT^&U=9E`D}bjqhaTn_Rs;j-X-@VIlL`Ss#yG2OvYrap2W8 z^{4?1eq6Dx6}71p-jTKzZe1Q{X_d_Tc9@V+co2I?g#_pB0NBzB0@Z>ZUJ7*cfx5WI zy&U{dqVE|_M;tB6zCNHoqHK(96DLu6?o@-aOKF&rh?iu;f*Tgi6Pf}|6$LIn`k#7# zEMH9^2Ixju!suw@`*4CxIL3iU?%*mD+oXBFYqzo6h8QA_E})C$sQe?4KyckAKH{Eh+8tmNzfBg%Oi9ksbQ>sT0s;|Gbfb8 zBeFXbrv>JhLDV$@XCNY21mndcXd?$(T zLBM(2pNM_oMsc5pp5C3MC70g0)iO#P$!nU(>&*wDlBj+Md+&7Wjf98vG3=2SVr6#D zLad3rW^l%`s;UuoMv$N(BL}2a2U#w7Z6C16!aoxFPOBD2Z*3O|X@1Fh+>4N`Ba&X5 z?F&YYw?UW@q?N4*tz$YnJJWy*u=LjX*U3D`z0^np&_~FfI6T-4h3Gp&9Cxss zoVzmv%`B9{|X*^q)`5g4L!oNzl3U z&*46f+QvSH{->}@`AYz{sg&F|o=w4k1*4M@G{V6R<_HXlVHOYa-&szD`4l!hA^hk< zl8^-fTKhGov37+u^f2p?szBJjSBP20Z*bBTtSp9vp0xSpCg4sV+_z~ZucWq!bg`uu z*MTXqxI;<~%P%V{%M;K`;0V10?I7BI5TFS?=HbmFaqFW;5y%1qoWH4&FBJ%KBsadt zgIZv1auR2jN-KPo1`%@iVZa+z`_Ht~F0m>VQF=62L~FcFsxLj5gEwI{GZEI|0~hM8 z^*x91rUzhh4P`j#4%(Ek^vg;HA3r`0k>}e0FFQa`}RkoS&Wh6>$in_e&)E< z%PID8;_0|Cd8wv~BBDyKh518sk1YThq3@CtgjlzIKSBfye?jP>8S)o|kb%tVRcW}s zs)EV>xM9^StwL!Xz63j69P3@WZDl-GCZn>FNy_VYJ$OGoc1VRDfEbig%}M-%_)Qbo z0qEsR@3or44N!j68zG}F;*u8FXdh~1D)EhNXB>7sz$qD|ea?a|rl*BBK@I%(34rjj z;%10jxSV8GFjbV?-Nh6&Z}}q6GC{$1hkvA`Sm@~J42rD}IabYyAwwjJuuK!zIcSrN z5rTfnu>;BDkl~u9x&q5-9ahXD>I@(S?I-o6{=P1hMWvH|ECuk;HWu}&flQI@lo@3n zL%XWzCytfxO1mn7a}BfR4xk@a&~6=f zEwCaOL%b3cLxXAB2-w#wdlf*2K!5?htE8g`vd!>xlMZeO*n{R8%yyQJ6wX|K7qE{| zGKTG(J2})CV9x$pVBWcBelM(&C{ljZ#Wz(QTLu!_NckV~o~FMRV;)1?uZgWTMl#;h z(+Vd0l+u@6?6B%iF9+uCslApbg&Cl!E>D+a7k%1EZ1Z76L499PmR;QA(T&|#zSMWc zd5a`~k9gw28i~v1xmn0Z@w)P^>msb> z=jv<#3Yt&x0<>Qfy0Ig6BbgGz9mao`ek=D=Lf6@==kpIt+MQvLck@>~n0n49GGl7_ z8H+f6xiEEKVKH!hMlX{hB1;=M=t=zN7{7uEy=GMnDB#g{^QySmXpo18hXknQ^+2Tz zM{On%HXKgPRRlXIyWaFXYATH|*K_j~gmAAf&Dvkz9F~q9Z*NZp2-fe>i%h=U#uwhy zO0iC_Ydg3j$z9`jLuXb@xh+(Tmg_kxO*(pd zOJGVn%)ij#(=_N^cWeVSG#tt&=N^CPlY$ZS8gvw5z+hg!e3_rp0*Mijo**%5fqY=F zfa%{c+tNMJ4ZG9_#{B6{c&0?H~aK@#J zR74kM*uwP7AIj0rwB)%{n=xY>lA8v4R_^gWNb&n~!@j@d06Jcjj+lf1N^@(jAK1s! zT?mi@s=-}mahl6gE3_DMAPR;@MK%5MTdQ(!6NRh}Hoj3Htsxi9I}|L_Wxn9NhY0{Q zz1%AW?BT-OkN#o?*Yr2siN-q2Idwzk(j#PZ=DuKfjZgiXLiDfZ%(O`TLXeHSWo-3;l;E_qr^H#;Mj=QfE5t zp4tX*q^MuYgY6iZ3Rx(@W$BG$my#tyx3+xjhbrlyvyT!8+R(qC1dS}T@uaI1gaQ?C z@-D&#o(>2IY8oR1O+j!ZR#nvoz~}~7Po6y4f>g!uoOK^B|2qc)4`0N2A)PD4)#)4T z&Pc9TKijOnz1%+t@G^AczIyGN#m6LJz{dugaQ?@FM;f+n0-A!HL>svL}q>)+%$&|1BsseW2y|>l`F>6xQqa| zvUzEgtk91Ap^6Kcv!s1G@OFD&$ZG$OrTG=uW*{H{DU4@L9e^VUia`1{P?*$S!ZqHb zU1|TIVFf-UE&Py~{feD&Hoe1i!Z5WigeTGvf=2}HPe(`WNmpy6*B~Vz*Ss2LfpvG?2{59ikn+pv0+s|CqT&Gc@hJqyzpi;j2t=d#X<@GO? zJIGJ&39`^(s*SSJ=jJ4nxJr1#C!6c%gG4pKF&H&boVq0e)z^OvzTg|+3MAgP1CZxNbE`i@jy&s-90tO0(wiSVB3#JYsSP=jo&l|%w`AYcT zsi$*tu+cvMgn;00JCk+|BW3q9Krzt37pu9FdK}p&!;eXOtp8R5yjZYuiWB5g+(~=4 zaV+6Nklp6=LPLRCPf|C>Sjo3LgPuyH{9LwuKnzHOI2tYuCh((~vC;$>sKp*cN_Xh7 z6ak)yqiAb|AU+Nxbz_nLS*Y?eLuT!N=8V%xi4y2sBCbzYI2|f3J*IVz(S4#IJno;t zUzF~D$P3ck)^9~dMFoGpPQHr#lNA-IZ(Oe=1ED4+AkIY8)5o5#DTVecYB2}s8P|yu z2QbDTkjl768nkasa8574u^0AC`S1M8xK5iRO9dE;cT(G7fc4BH9^i`gJX3)k0Ohhn zAAsJ!j}2#8zCW{dtXeZ*+0>b&e-alI=fHv77*z$Yi(mrYzgH{Y_F9<^g}9KYRP5ht z(47cm&u~?mF8u)YO_bw8eN#ol->@2hVEf@(76@)Eg!TgE>(Fto%1i4Fboo$pc78e2 z(at^K5AA--w<*}9$sb*y@9O{0iJYPNj5sL#Dd$H2HEcA2fYvW4{1ToV3A_wx7XoBZ z$p|QvU_UbtIfm?I+?rKX~P(1X~-s>zVSXyT3ojl;7dV8?JaEgA<5@FP{hDd zfQyXK^>|+?*|LZ8b{3jI;&;!}Kct0u^gWcAb@*1M6>_^R!J!%T&(gedC8CAZn_S=6 zIaEf51V031hd~N1cl)+D7+&yW>Y<3`1O}s4a43OsHfC&md=VfQh2!y~Nd6z5`=87> z(m0NlG^1UBsQ_J+D&t3AJ0zPvrZSgll?vB%bGsQ%Ta#ONq(mbZg`N|*$%KtyDrU*0 z?6&kNpa=I`><1144Dv`%l5oozaG67?6Jli}8(}~-r-rJ|uzJr5BePkKygimZSkJus z*LtRB-m38gRf+iUNaBWC)InhgdU>U96EOA-;mc-MH+~EXv}x)%4Hi!F{0eO6 zf3#fr7z#*rW>o;lH!7T!8rTk8Q!!s399@R@;oyfYGG<2$KQcMWRmDot*-a*~N;X&uPcnFh&{U<-sLy-e`!)sHDMHkq|2w8fcnH#f84wBVXV{b26F>a-M|B2~x#bc1s!V5Ola0M3YlTL;D1gs z-kcCuc3v%obJ>hn{G`}IFY(UDJ_O)uj=H^LRdxh&vA>^}=68aR{F3(TXa}il_3v!e zVIR%$m`wJtggxS{=YsivLBNq@qpYNs*Tdvb=&Y&OG1zk54B0G}((v@Z^)zqJ1b=`s zoAjKdAoTlr?^Y$O!U72kY>XG<+_Q{S~q zTvPc@wwK3T;6FQGz8prgSA|f*5|z?|rrYi4xhoEDziKywr3caZEUm50;a%k`R~UDI z1Ct4!_F!lVG>SIqtEXy7;`TO_K_%Of262)&86`FL@Z^48TF%iOb12CCn|E+Z;F$U- z?*TFX9kncZuQ?d2camcZ^ePM0bhR`kKjoOXfR)>*agK72dR7%8(?*w zDl$^IX(ATg(akB)EG1Gt;Uw_e+B=*tqrCu153TL(%20|z>DKlXxJLg6EXxo&91$4y z+?8rW{TOi&gKehy_%`*Hc&iNO&;FHTG=)RYT@|Ck5BWU406?VBzU9e|FvzA6-3I~( zOMpTd#OxVj*w$)^XCPO(8zopt3b2$STnnl~MBq<0nc__W_TkDGFJ6pliD5jwAF$b* ztHPFVNd0{t`>LqbWV^%1>u%=Vqx~?v$tEZ&e0p|tWsi*rJRHi1xtEw2ro!f1@^<_el-aDigyo>|A zu(~z(A257VniDI_{1kpT;UV1Y-!EFjbTT4pH=Sd5`kul-%ECbY5yx7gQUxDu9bI?- zsY^Nefb-PCjh6;R(76y(Z*;RJ6pY?_%244CAV?I<10}z2;`PISt`NY9gVIHTV>ueU z=;y&0#uCh5&w8)s?aEN2*Uc&Os8gRL;p6s)%^#b&79QbfS4iozZ-$Zq^i`n#Pqhde zAO2mI%FXe!)7+^p<-o>GE}seZVK_Z!o(A_gdWN^wPr@t;2g`Y2a1#R5Y**$B2342< z-CVuW+k!T{zoA^`SSA2fJ;=lEv{^SH+SJPo+%`$Tdx`t4ZjI$&bEIwF`rg!s;;rwsIG%*d<`0tp3!J@Fv5Qs#A1BF#zKi#>u04&QGAUVD>b>EeG1 zOjoc67v2KMx)0M7M!ANX=q4uiOuQ!FPl#dX;Aj|M1*vPfc*hDvkya4n0bYj)jBq5t z6bl!d#GQOgNh#tivEkVT3u7MC}sa}xvV42ofZumg&^&xK|(;H%!@kcz6` z|F=CCyynlxnF-`R_(^QzDk8ig9FZgL~S~5@%&|EN+lU?suK3QA#IjYd%*4PYFP0&XF zrB)~N&#VVsZfO}0nM8=>GyeE7k=8cc7c$^$S3bazpr&Bhgc@mDz9qZxSmeO3nl)Ac z=-J~cP71N)0ix4h{}aCYjDSGGoktc4ieB{Np%)$X#&c zw2Z&@TAhsmpobB`)3Ct+OJM;DMqH;)ABH+t3s6fpSADf`E6wj-X1wJDyXcOPMyQB` zqTlPw)Q-MNH^sHFYe1F0QX?(Hs=Mw|LLWF9dan0nJA>P0+L z`h%O+XVhi*PGuDjj^l@dyqRcEfzP(3VCi*G>+kMDo8Krv)g|(v-ckR1z7ygPk=*w# zsidrG(DV)+D3-b>@9eK}^&l{qJ61nMv_6z+&+3d;&TN^3POBKIqeo}DUn3e37^nDb zukJGoBvC+6c=x7LiF2d;;#%$Fc2>Prl)#C?lZhG-bm{ zvlVF%?I&jlfKWQ4tgP&{fpcg9s$Pme(Mi8=!=ZzPUt6SP#QWxclloFfY}+q4UB-ZbrDOz$vATEg<>Rba|9E)_zUB=*winGjsJVUz`+kXKjkLxK=%|^f#bPB;~ z-XxT1l~DNz695G+kL!N8!@~cpj$^aB%tMQ|QA6;jdz0?sidE0xbq6EKaoFCGQU2!_ zm)Q`(7ZU&&tkvplAByG?M-ZlJK}!WoVFgZ+|wemYFh+Te80y z*&|4-{%jE5JIQ5u?{s9utUe2J7#IXe3~0!4jABk8cY}5jVZ6rU|7CAi+PC38*H8SU zZ9>c)M7)FGybr3}o3$Iy2g-n?RK4=msO=$d*UU-d=PZn^_-iO4T3JI1(!NTSi+ zb);!1fmc+-ucpe!A5o-3bTvid)v{0-0!}1p+9OBCk$DMb3sx}d$>0n-Ffg$WB0z^o zj&#o%3hEB3oZRqQMhJ-y|BC(eQI6%vjh=$Tg(hTo&R5C6b`cYfTx3v5@!-j7Dw*xy zhf$Ua^@aloQs;`53<6y@^dM_>*oAT{qVEFL+R?oLFm47$Mn+V?b%tt@CHUl{)d+;_ zMO22kuP*33Fl;U~0-QBP2n12}cfhgg5bO)YAaFoG!!9k&3eZcia^``l;eBYR4#v)TUKkLFElJij3fkRaT1P6`bT#T7jKch~%JJO!vKWq`WtSM@T54xcXe!=4>N z1}V)Ac%l2*co`rSb*ES|fg>155L)$0(`dbnDA|u_XKA52wC8m83#dgcKrIV>5*TyU z7uM}W!XfSLTY#vQkSMpn-w@scl|$w;L$?m&bNhAx>RSI`wi5(46(hhZFsStr z_uE=jK_GOT3Tp#U%xX*tQqV%-{RDPtTxU#2#maUnSZ$)qpftS{3*(BoM2K3um{_+( zjJ9kjE&kgMLMiF|A<47C>#~7RNR^yOTOEhP)>XrEY@3?ZH6#~;S%LtN)dD8y1mz;A zAS2)uR;;p~o?fvpxXb~E=jGLR_pJvI(+c(ef_f$#SG;_DjVQ*2tjp`^88I<>q_7ur z91L%4lm{vqpr}Y+0BP{4Q>VARgK_(j8#wz^SjaLzE=X}Gjana*y*L$_fnAxD*=0oL zs(-1`inQFEQuygc@GExVad3ZFkK;PR-(3=C?H)r80L zI@C(J2)c#fs09e%i{Sp6nU==h#f`f>%#D!9hxqOAa<$CXX_8P1Ak91uZbbR3xJrcu z{&TVMh+p(l4Hzzy5J_z8h@oJ^1L>EJR&;Lec{0x_ zHI-><7vYp|;JC)gZr0WF`dsIGM^@KZ9lpswj3L*#aiNf(hT=MMJZ%iq{!L#SFW8XcMiesbz%)SDwNJm=@{%S?))-_H7%fij=XwfZcNGMnB4(E=W zf61K=_a$XGcL*V~YJncNDAejYVA6MbKaTeVc75h$;<(ucwcb&vWGg|i?ff$}TVO5bM}){y!S!jD|wB? zzXThL<64B2?czRf&8cqF07z(BMN1avmQQ)=0T)tu@p=0Q|sL>JCfj&4NR8YHhsHH2ysX-pF_33G=R866#x5}^dQD|vl6^P)nO#pjUNtE2_ia6c$&nvCw%Dt50!Uav?4;>} z5gDjOIy!(+K>;E24+V^zfO5ehEc~*smLiOc=&v27rVVwZhLVafgQD!!{Xwur7cK0N z4l#E*$GOj?@M{&COfr8C+GCbo`sWgNs8;jMgRTCMSw|~H^Tf0KGO#n|aHJKI@@y4z^`+O}-#*9fDt*v*0_=n$U z1A>8MOe!!^(2lsDf;oJ-{TwA62@>GCarpc69u|T<4N2%dZQdX`TCZyNCL0Pe8A$eQ z@FxwPO}MvWTe+$f$ie=46!ajdjQ~O(K!AnQZNU?8ad8m>`=|j5UGjUKX}0~n*I3we z6L$T#q)r^W7ZO6xPDIctcY`lTi<6U2Ev_uGg4Ru7>U|Jr0kK|@KoGWQxRl12Hco?9 zL+^npC$E+cQFc06pa#u(j@NZE>uvui9QQr7_HA^RY}mRAM5)$^Fc{28vk1U^#EtOkx1Tg{pFla$&WB9h(*KUwkYnc1gJnG~JI%h?jQ zrBz)0#RZy1E~WAgLLGMntb=ty&_CT*{6czn%hja9Ne=MzE@1cHf^0zjzJ%NKv9KnK zrR(UUFadQh)!17L8p5WM3<^^y-&%$C3mha0)-cq&GVp<5nK0{G^VU7A2|xS70d$Yo zzQir}`$>LT*{;>T((g1?9l;gUeKAQ$4s?LJ5U7ek`GiHr&lrFhxQ~^SEaEPT03+!G zf~Q+xEM6Q&hAxWPdXPXMN5rN`SDnU(_jct@pd zT_1984%a>gN*X|RZLFO5+gH$obN1i+*A$k~V8|_)OX+>-p6pwnk|!Z!FyI>tUBKWa zO3pjdWZz>CoAGYCN8>M^Z`>s0tW%T$ycbhAz+<5Dkp~_k z6>Y==0m3Hbe_l+8E+OS5A~y5mrzcdLg=FrrnXxX<>c10(*ClqBu~=7-&JFD~Gukj%$9 zLT;O${shIjjE{Y8enn-8QuukX)Sadl-v8!-huwhZ)YH}H){jIud#?b=;Wxx1(@>o- z1S^60*ORQpVQ7cJVC?G)AYg|%r~Z-B?swo=rhEQplDvowi)iV*Nc3ISIf?H4po`U2>*qaykQT1c9tY`!RUe#|E>9O&$+V)8J)*BiOIO)W*7SG^HndMxuJ;rT}%bdx;lhLBZSov8>7k6=J3mgzV%G-%Y9+p z`0Y>wOX0?c)q88+v)0`dti#V)5x|B2<$(LkC|z$L@g!Vs^7gHlbT1f+SD}3E+tE*P z)7+ulolQ@E`|;b-$8E?(w*p?NH6;oD_x)Ojanl?&pky}p zJlM>IV(DzlMSMLQ`(y4GP4~|`e5Cxvft%%Nt$akw^aS`{yf&!@u^OyOX9IDnt3CbE zF%c`%8|JLATKo(KG@12G(XQEaZrx4*{|o4QJx##_++)**b`#2V34Xl#b&HqO?p$Q! zw4L`A3%>`fU~U3mS=J19P6>X0EJcp~hQahcO(vw|iDl*c>nYJ}0G5!O@Yhr3Al+_PfhH4`u_cl#`nZnmGMYNhZB>6*Ehb2z^;jZ zBIB!i?5#&XlI?F`nuU0Uv+K^Q<}IN*$_Ftlc57NNop8rk#|8ZRe_$|R_(Qi)OT4`7 z=GwCpKL&~aeoz2^ax$UZC2OK!b7|c4_ex^>`V+TU-B#u13<>dXgnPp2ESwC_m}7QJ zuT3&MuG_v#{$-2b1%tVLOI1-$Cs?J55O-r8D66f3+RgNr7qOOBRzIB1ey{j_gnR>f zA8SBahmMuC0FdWgU{V^EJDLQxfGk80U>AKju+aa3GG!WSldE6V1K+MIu(!VJX<$!^ z>Mu2AE25YPTlpjVuD2AneT}NGviJp(-Ic?T7vpT<=upS{;nT^MQx}=2ad%Qw3BtD- zP`h-)Htz;JAWoLk5KbRrk`S6R^Udn<%0J= z7P)aBWFMODCuyXwCCJ`038wrk{L)m4EfV4=BYHUaOY&ZoB@$1NsC~G*!J%2D_jeEz zC@C~Lj?)eGUrOotTwc`y;RS9G&98MxF5flv`pLaA^;tm_l<>E90S;Bak9xF;+V_6y z{#NVjEd^LAL{kr>w-yx2t-z^)Fj?GH8OR!1snWj2bI@h62lA!Msvv)juU>R9KgCZR zL@J$jEKj5ATBtdd+Iuxv{=#;C^*bLQvt7!Yo4_Bd_5A%^z_Uov%1JoK&~|NZI>DcY?&O>NePJz!NV3=RRG!jgL>Le$=+=p568N z!WY*VB>P!#qw(M)9N-TgJ<7K2&N(3__RH$UK*$lE#rEgldiO5*|xO2ANYuzK?__US8P>@V(*yr{NQ>)>+)vu)rqT z7~K8ZG3nu)s%z=og?C*KGZ}4liy^$kuP^t6r%(S;Wo=5i52^HXES5Lx`SWQgCV+i{rI^DEtE__(>Vcu%(QSQ3088-AA& zblNqlWVkQ>grUgB?QSe>LGz(1+xpf6kyzT}*HV`H4b(bSvRcW!yp=P)G49^I#C$F% z`=n$6kEy4Lrf%+S2jc|3hZosSoS3W;k$3HVJ)diohQdtu{&$=#+Ir}TvEIv`hQIPF z%e-BDhHn_<&87S)>g@Q3d-SH8nT}o(Ti=ZzOUzbpOQmG=t}MH{xX>~%7>td-ZbA_q zrV!#PIL+s8YSC}GD>EKZ+c{2;dl|;BJ39q;H0$I;$u{N>)6*YUm4F)IfG{zO$A)lG zY%!1a?~U)1ZW2GRoVi~s7?iQb-KTsNb@QL2JJ-#yn|kvy)5~V-v_yi-VD9*`5?c|^ z{|fcIPKS^Tzx=ah?yx2>j}QBE<%0;is%|K8Z$02sbMqD^t8_ex5Z-$GWduACpaG6!=nQH8}aj%b4zJ-^xu16cbUmasm`=a9H zi9MEW=y^>_+Ak-idW~XxYq|W7uf9x;=!~6Gi)Zb)y{czbY>sYIwV2rHb=RRI30Ic6 z&Q`m;Y`4hTkQ;h3*;!fsr+@z4`)jKP15)YpZQA!&zNsxYZMeqp=ot@Nm*{ zD>8}sB`b4ZID|W&sd&9^B+cB}4(02?Uh9+2u(DkFnpa4t{UU{e=T~?9+P8G8FiJ1C z>e?y7)%q==7nBcDhHQHx`a@6lqRGCwAAhEpPCA_x$-8*%kUe$Q!o5ykwL|##+6VhF zm#T#4r)4&mFJHc(eNQ(>=N;X*JcE}RoLpQ7CENR0j~tnteG})owDhHXxVfXzodB&%k3NdjDjf)=7A~nI-sMW$$&U`2KLn3WjFgvRPT3XC><p#StWAzROQ6MT&H5RS$v!nZ1OhMANGhnJg}d=2REkAnMf z%tPH;pWALfJC*K>fZ3_!061!LtAZ54$*vBdfuS-pTPS3|Z*A285Q&b7X&N@BsQ+&S zI^xRHVJ75Fin~O;GPE~8q54?JBFJxd+e3d7cE1%s_@RB88rYi#3`%t#Z z1t6>dda(SX>SAXEp}E~IFe0YG45SNMO?2hx@~_Mf8KLtWc#gM58;LKbN}E7Y-qBa5 z!1xMCU*aGR?}C~$8YbAD^x=I#y9vQ)W^pYLr%U^E^HTWZvxYN5GSxjKAFU?)rEdSe zH?WpBx3K@KyWN@vH-4tBzgZ*c_}SgvQaU;!{)6cbjlYyYlh8+2{O%ys0H;4Hk5|<` zt>daFnFC*u|3loH$5Xw2?c*ACnsiE&LIZ^+4I&vTM@5sFGFLQ+$UM`)X;eGS5uuEm z(l%x&m82+PXC@+3lp(Xzka=5=cHoq_kO?c`(A5Z*R`%?m5l+% z$^f%;!GE*hI2mJ{VT8|v7#mfX>Ui955R-LuqjX{lEE6jp-1F#DF?R2|Ca~^s5NJZw zQW4R{OH^MQa~FdE^5X7M9h_{$;1WwP47IMGLi9;)-E>dp>a7sll4NGv&wFnQaG`f( z7WJG50MHEKsuplHegT0uFBJ0Y2esiJ7|6sBBzNE%DUexBYH5Z#vH*=w zG5FiChYu}qn+4>Ytmme!@xPwID|D9=ePtC}^isKr4666`(=0(z(;`+y*-ha}B~EhX zwWr)b+TerfW-H6{4uR?}I#vqXc+TWv1>aa{xXxq12^2vD#LAI|sRGI{Y6N=N_`_Y}f#2vDkKJ1+*kj?Vn}#^oS1ie~~ji78+SCi12v zmh`btYH;qoEph}M6GrppIakpBBG20wRpGU-GAMWd6dX+z)BZ0w+Ad+nj)VKN=-!%D zBWX*7(ZyvFBCl)uDkVdcGw$jzO>fFbeEI;*lRd=ZMX|=lXsFMNPlI_CBlOIq+1DfQ z;kZ*}01z>V?BIUj=Ov(ITS7u7jDI#Gl~ZCpn@S~C;HO}`*@eHo8PJZDn9Zm4%bu+* z*>}T@&<-XVgBPhB{rCXWOY^$%W_SmnZQB_buA~01cUawSeMC5Hv*d&uk2N`!&uT@t zD&D7AyIIX=QMeQ+3@BeN;bc12UmEAkelUeMzxnXKgs9f2s{BZ^i)zZJ$KIA%^GW5z ziN_+yz=~@f4)w!-Vn$kZBtJ^^j2u}fSRf*K)K88qL_W|()WRWzT@4&DHqd_X8zRSBH zUcc2SkE-!3V{2bdRjV+Yzgn%T9!YlIp@yS1m*NSw@xSiy-LHS9gjRc!Jk}?}uKQR{ za}!xrP>-OpvhopVuSFI3xGQ2s{OHYLKS=S(Nbo{f3HZ`bJ&Ld~eFEslJbtycvhKKR__(zCv%_ zyt!X+7<9MOF~`c@c%-*sLLt(jfsgYl&Q~i&YgFA>J9$#&t*Hrzid+I)g63=tNZs-(&os5tEEzWjvO@8g%OJC%eg!-FmR;`GFq|jWKV=*I8g5ZTJHkp*bcP)4Ywf9-N-yN zuy9&kU32QQX9k7Lu0Ah&6k=>%)mL=aPnEL9a}lPUnyW?B9M{?L)e3I76JwtfIL0lL zN@ZnDrr*mMB}?Vj z@F|tg2jZ-R#S}Ice+&CpG3pgz6{)$IC1Km}%hEcV z)c7={26k8hvUc`e{O>wdgCnv!}X^bK!^&bTwFAZz&N&W*q}us|Z< zoUnAs5> z#RFc0I05hKmA!jutWLS^O{P`Y375}466Xeo9s9x;a*aD+#FWa6pX_zVda+tn@pooEA7uN zweKF~`nZYej}f$Vd23hIjei|^vwU}bo4 zj?eVz)5}X|OrL&m&emXK&bwv0>3~1}-OcOP+nd5KpABsCh)z#1#yK@l@aoT>^>GS+ zhqW^qE9(UIB`n<)8?)hob4K8LR~gQW@Wk;%adL>v$>#6;KJWLaKH;#pY?X5BbKMqt zFz&wGBq(>o?dvhp#X{`of`|N_pS@2|T+`Y7eR90-2r4(Dgd1YhU;O7H;X3W!#=Xf_ zmK`4QAuaQTBDg7owdiAg4*cro-D`@SxFNNeDROkSSsoK1E3Ji#-kNNfd2&o)hz$sh z84WAHmdPK%k<0xE=8r5mKmKMq-6^!%y1WoHbO zIuh<(H*G)c$bA9NhE<#wXkSwA>)7YzjCb!EWdY#VTv%shRhzy1V{Fs`gK>!ot=Vf? zU4D~)A$B~*?f(or_6n=0T(IRzu(y@vq=b9w^=Axzy@c1}x>)W9n1c7msn)U&dEmuP#$w}PPVkxDfIMqM(%g(ts%HFFp624gzSqWVc8KAE zmVEWA4|vr1vIJk6eqp68M;S0eM&#Pc%y0S_eIEceGvO}osfYaHEXkeoVI54ro;Irf z#(|{O_2j34$=Hd-c@mc+h9!PuX8pFtkwr0S1Y*%IYg@Xe>*JMw-Pz;uw$(LtACyF8 zFIA73xZjr&zj!@o)T|*3beM+5U}btHaihwy+Ty;Na}%f7p$h7_aNs^|U%RGZ9OxOr4l#gqHZ{jao@I$w8b^E>5) zRB{hL6Jv+Tat5W_j!c{UJgkKufOU<uG-lsA#HW*zLl%)KQho6S#4sGc{OCgM6Cn0e2{j;yB&YAy~@6}iZ>1Kj`mG%stc zzXn>|FMPb!dBgnj5^lB3<9uu8gdgMMPi~GIzY+5839R7g)rxQ1#)KWf75E2ONXsrM zj%rvt_kwcH=xK|3B(u^kDwlYJ}Sne5cCH_WX57bwc(FBLUb zXr*%ds9&K3dA73IOAyMvE*_cb(M0UMGi}anfF(c|w2y>uOq?=Bd*{vzaPE1ax)#E`Q9EXzHb7LQ?^oa$umMHnY$t;)TTYfHSVpzCTZ6>ji`0S*JW;g|V?lte zY*GtSa=)#d!V1XEE4m+W=ob*r^M+Bxt?!8CF6dO(ddtWED$*i#DTi>1TN;%|RBF6s z_1eXYl?+qtKA={=cK!MT6u}XYM&5=!6}W;JloR~D7%TeYtgPMFASli>Y;0_rD$`Oi z)SwH7vR|PiC>TQ;`xtG17$Wa|uxv$^LaADSto@tZSOHxArp+!= z5#gY^^~=hQ_bgk=mi)y`wlDeLqjUCk?|c9_y8>jj8S5|^77koS7^ulppnB6S=gA(^vLv2}YhK<=yFcTlfyAwfZI;EFN# z09RZQVIBGSiporY8flFJHa79_dMMS~>Gq zsYiTb&84+}Ie+WW_&sMtsQ?_&H3v5Yvtq&7jkvI+d%2=enUC ztYak_a87uSPJ^x&GxmIiqwh!9O$Fg>S|;}O^Deaj$Fk+B!{7jj+r#Dj!O03G)xENU zJ>D(0-HuWZs%TznAbl67Z2oS9)gxzR0YM+kEDu{N|+lgx)%>3hmg%m zkA6$S%?Qta{`@%=bSfHdZci<$AnIBsBJu}Rf^4XfpB5cOUs>pz?p{8t5q->?1-B$JC@EXAComdF88OZ&V9X6$>4XJj*$MS({e~(ST-amTq5vpMjDjv| z&15BLn05yq_+L4IO^rrkzpjgi$6#~Id?y*_b!{IPo_ z;YavwL_z4gA%8Ui>=4~VpbI`6jde}%+)r_AW5WHS5j|KG+|l#-rs)fzeqy3olAT$> zwzAGcTWU6P8PCk6Es(f$t80usRejcw1A?0P0tB79r$sF_-Q?9k`|N}%ZazLfM|*#K zaj7{oTPt|WxD|SFn_y(mMvFLMy@qi1RaRhQoK|7<-nEt`kCj25m&PCR8|r+hcm&|M z3xtV3L7?wiqfOP1*raWF<`$(sGx`oTi@&S# zc-2zFz2DWt@dv7F3KUdNnM_-0_K=DP;{zT&>3mMU0Y}+0I8af~q%z#<2&^O*sED6{ z`7kp#CkTk%7IKM>mW`OCFxMye^y$-bWz<9xqxWbFtiLsNbgo0S9SR$cKVtT=Bjt}3i?q#-lZn$pxBYfmCE&s2YK%bYR zt_(_Q(p~xR$L6V|V8|ESnEkH|V?76l)RCM}Y?2Mfx#0Ex4TjvBU6yhx$2wB+Fv2KeaS}NlXu?&adzWGkuD&3kfL#L(|;j&Od0k|f426uTnOIE zu0E*!TP6PVnegb%yVIwR|6d_Mrp#1dCHFhZ5}DsKwxt@smRk3d{c+7?q1LIKTVk^9 zpOzE4p*R0D0j^hHoUm9_Q-ApNU$;a|oD13g$c1YwxE6b?vhs|G0j{SuY z+>cYOuF)7#rXpwOhZI&z$Dl0tc5DN0OYTw1(MBj^gAenk`u43quchXdRdoHTh1hqx zxmho+a6i!k#s3HW<}xEYz004Q&m^;26>f#?+Lm``MDf8>mmSH;7A3FF?Ff;%Dg4^6 zh}}(9ESD|n@hNJ6jN>?W$?>Qd#!l1?`vK@fQD_N=y653B5!K=2U|(~gGI{xOAoEMh z;{@Y#kY9Z=BU*YYw~W2aKb=1)_0CpR#+KpXVb7plW>>e309H#=$8r+J8%P*CIS{`r z6ky|02@I?nDQt(UiasoCDha(G*s8TFpvgyEGSl=|IliF(e@Qq;v~NAbxSLkY+01hb zz0b6b2;lA!r~s|ue@3ggdz&XtgpxL}dw@-%i+IoZma=vW$ISw7+GVOd!tisy^?L39 z1wrwgGZ~IpzM|@mdAB6@XP6U)W^p8=$DuZih1xKW<@)36!o;?{Wi?ZxTZ_MCsd6Eo zO{2igb@k>y?9|aXH|v{BNZrT1=6ocah6pgWm-}HURmo0 zU+oc8m2YyEnv2e*CCm*ESUHVva-{sPS{&|9(3HIw5b}0X@|!oS{jVJm>)2xDZR}%u za}ZKA`gQEtKNmFe2p#+~$7Lv0@ySLh{W8BpDe2e0?fw`tiO=F~L5Rx+al=(h!|2O# z9%f%gE{aF&+_HrnUJ&3E1Xd#u$SG&Q1-+)0+idilEmr{*Y#Uh(<9Zga&~JD=CCuzg zO1eYf>p}?Bo~)JHwo%izGxyO2w+!x8tJ&$Qaub&4P2LNR&Ng2I{V(`fkdLU|B^6b< z@@5|X`!+=QH?`$C_j+N3`RRT`1=}f&PJQES$8)d3ORi=9gcEBP&l%f+daZFx zan?ZGZZ75=&v%q9FtX@ZYfYGn+qsGTQ+?r7WnW%!{@lka)B6vUj6CNew(4Deo;xmb z_a9#OmyH#BZ`wUl0sT|Wq1XP2ldZ*8O6Ub|7U32tJkgmF$biQSL!<*vou9iGKJEki(iqE?8i_qcD|y~)!Sw}Z3I-Jn;;0>V0G&OnwJS4JGq z6z)!_-kiM2*C>^>)!R6qHR+>m>+^{fu?8q^Zg5}3#Nr>w>Z`r8YSgx?ZSpq$GFnh# zwqB3NRNfJk){F7@|Bqn8eUK*N|1g4#ES2S1|444b{qD@^1Kjor$XI$!IrLcvr{~3Y zDFpGqp=YEL)q)WBHjli)j{hg^$zMOD!38w=b@&M3zq+?0WC7k{B6>v3&h)Cg{MSfp z(vX>z6XL6CH~$$g|BI=d1OCwuJo0UuqYY5P6r+S0`H2LFp#kT#p3mnAcip0-G;_v` z5Y%Fg?}I-NUlB#?2=f2X&!0c@L&FAyHvzbUrhYu z=JppWId2x1g*WqE>JSbJ0~N^#g~3xsTD1RN$)lhP%9J2qIHBrq-n^NrH;9rtfeJRl zp5r|rz})t0oZIr|q;!?^bLo~jPDxMv3Kq;cy`w~&Xc+cQ;qG5-@37X$_SAxswe;?~|E9+HMcQhhqI$49Ktcu8-tFgE zS%zh(t0Q3beHfK`A8a@#$+N-HuY;K~+GBSQ_pviD8>Oyvz0K!8P{;0qw{am1wVw`e z;e33v=XcuXnJWH2g&UI{uo)?WZH;z@hdT>EM}D`*V4n&Dw#s^6$qnZ|EHv#~KB+^l z;aD~c&s<2}MGP=9>Czm>bE2%6Eq!X&NB@L$i3`KX*6j?qywpO6;Ethi3i7k*>N5qL zVjazX_-?vseLXW~_;YFBDrSu5=M?&F_;D$#BF#1Ns-=R(^NW&N1i&`G=TEU`wTD9->@foJw)8th1bJn>|XjjVT z^|>iO)^B%Uh6(*^`pV;JisGSYgOeZI6P$4)K5tM1!&;(Fe0g(AesbB5Zddu#r}p*ww?8R_b%r{jl&blcEnHf!Z?z6;aBhwwqCR+Yac`T43KY-h zQ!4dMEq{t+q^y!y+rVpoJg{KkZtdCFhHOpGZ>Hx-*Bmieke(l3dHhiIbjY?*vd8<@ zJcggIiUu%-cY+h1Ub1M;NJJ%(@k$$@^S+QalgMbZ~!Vb?m zls`-8aA$q;#9C_^=qfkljy)4HCgiQq$rUY?Y5ENN`i-g&_6Id=nCe)V>KQ)E>f&&o zo?w@i=geHqX64N>WxB~HzA(doI#T517*0-Lv+1?Q@w2HXa?T(se(ngoga0rBI2MRxfv)TV+d$r|%hg$(rD{l^;H2mBx z``T$2?tdru@jgAZ;h<-kDmc9H+W_z}WQ-=Y4FONla8mCLrlS{kvnC!q7 z@fWcyeUbyv-xZp@|NN@(y->Mw-a4i4)w)F3g|7BI#TdQzCwR=S@O~1=m?mX=#oOJY}L}xU_iy% zQmZ?GKBL&h&*{fG3H7qtsrTq_c*;OO7D30-^?cH*aNxiD;!M0Tt|=B?REMF_ec2ac z2=NZfG;6&4_ZKtXym?bVy*yAfQp;fx0>lJY`UqZAoIBhX(+l3b$W_15*CW65=5s}7 zzy6Hh<7ECz4nY4hi>v4S+c#x?=BD+3`zyWqe`mTm|8RbB``PLJ@Sm`U?_Z?(yW8h3 zT+l*Deu9Zx5W1|w3?UyHp#UYHIP`s3S0B(iY2F=IZH8_rO*OU4K>gkcO?(Plugz$8 z&u61M`mKd2x({?kzr~I@Tiam|&?-ga{;oln8Oz>8ZUYb(lOhe!TfbP`ZXy(iX*UUwp`%_bp@CBL=N-I4rc`s`UhF#6jdB=6sXF zI%7Qtcc7ub#=;f)x@!ReF=&*&y-va@3uZ3s4!)jtQAmCtQAgx#Am85-FAE>>O81|J z@xHvD3Sh=6pCEmt;6?W09Aj*99i~A9EEioS*8oOV#U|4&r0j#IeHe<1R|+nu=qh;CQ|RUO-^<1E{Y|bjgyVSen>2ovoSP(}OBU z88o7wJq(67xKU0oaYhx? z7g0G{`XmCjY!OKB4&%DuEgp?s9flN8LRMBb{}E9^*?f;_wxv-2n|JT%r$S+jyxr8) z6#dGlKxX*gy&H|as!F0qNENCpW>08a<24;hWSf44rAxC97f0Ws*+yt}_79_B4PV)@ z_e!$7ZomVhDqKI2xf;gmAz&TQrfVdLso_eDD**wP-D=g?0gx3U)Y?pCd^0hdHy$>Hs5Xl2EwjlX%G za{#;ykUkR-6|}(~!rXa`3$W~cn`B*cGHMXr3|o=6PgZ#amx8a?9`OXKweeI7Vmd?2 z!W^FB% zo2tk2SE?Pv_`+8-(V`Z;5%v~}Zar}yQtU017&d6SNU@xUhrKWdu6qj|p; zi^Zx8O_xVn%9G>&h#i%lwYWlk|Bb4-k_#4WqWguk)1u%{wd;{u5ud+5vWU9}X@z`O zp{5Hy6RNcR5FGK-*$!RHqe8&|we^@a6AOd&!NxtFYh1;|!e9j!zW*gOc1R_#o}|Ey zL|0=17RB6a53zWgTdg?Wn0!wg%^AQQ_-?gUyLaM>{OwbYtg!}MqsttiNcKo zAMg(%DsUt@Sr%Xp%Mc0%Dg|Ipcg>wwZCSXH$I~7aru;yA7V?H$Wb}fQ*y-ike7TSt zMlRH?qz1|C#0pPvocq$zteq_@MHYOXk#zzhk9g8RA2Pl*AvhK$EWv0-C$b3H}G6YzNSM_!t7g zizY35_cTU3>N*u5=mEo7)Ms5Nwo`@KOb2_MsggW~3nI_dh3*B>KmkPC?e>Y6g^v%+ zm%_*vLmUE;#u@2+TQA&~$YBY}ImMBLhr~S?KEY|hgu;0nPKWT{VtKqiV!ec+PeJ>d zgti7G!xX~crT-OM#kpG^9DAj6gJ6Y2nI)XbHMej76HDqowIIWG!@kTB&LJmc=I={B z5hfA=WfZ<<#@@FcI?-E@tv~w#dSMx%P+rgOonx>e+|eNCuDyV;a4b5GEYLu}EVX(c zZ`vDDIV#9$=)0fiVRZ4FcC%J? z9UlBCsoikiyu1 zesNYKzycCyO4v^EDTE_VyB=_o+^klUTCLg+{c{s+U>G7~NI`N99L~h@UliP^1G~ur zC?IAE3I?=oKy@C8dXt_(y1@E#L80t!7`{hh0f~r9N~%#sPL>WmZUe84RhX^9o&&+; zdI6vP)fS(`PBIM25TKgi*H%olHZ@;i_O=#2I14KYO$E8pH%{*GuS-Eut%ZXD0q`1{ zsjo2@_Yv=opktg82$k5eHLt0UFjmvHPB4O}vwI!{@RVNnLduEDC|!Fyy+!LIng zJ4GNA?_c82=Go+eL6%)VGlmTbGr`G$eV6G765YQhjU#f8X8?Vuhaw;BfJV)VkWvs2U4 zyNN8mK{h*V&pyl+y94<+HF>2Cc9$_zg#s1a_mNknXpWRFk1;J3YeZvub1VimLMg!e zuXWw-l@EZx0@IBqPMoNLLxEZV-rZWejYw>SXwnwy22F^IgcNQwc8N9;2`%(ZQt#Tn z6uStR+Cgxzc*-2fO)&3qzLbj)OD8rG=wy!v<62(y_CKnwu9kDG^Ixg3>kwKS2*tv9 zUb5n%4JnlW#qCEPsPJ$E60-aFfb)oQT*wjjB2Ci2qF6p<#;w7f&U#AFGm@L$OB(rF zQHm+y$FLAfFCQc~$X=m){{9C~`}hbTCtFc_<$jW?izOUAFJ7D{Er7##?N&b_%dxz? z^B^VQEKd=(10K~Y*mfCI`3Y_;hT7r!o>%AZFKfNsQW&hn8v>V9GpU{Ynkq~SBvQYl zvJVXng*n9ATL-qUflpQARhtm&uz>hsm}*W@d4*^E#wA?r0oL*HjKN~~NlC({-2z>( z&z-O@FJisUrA&C0@6?`g{|fQ878oIT>i22WOiugy&V%nDvm0YlLOh3ud_+PZBeBXR z?^!({PJtche$Dyw+_^Or+3zA*x73FbAl@1o>!kU$=mEZn!+9PY=`ucGQ08Jex$?^g z$v73R+Pkk99Rh;(;mHwACC2I_1)+-s$25mj%7;{{PutUBj>Ll5jTe{)A)13_S&~i2 z?{nrH)(Tf!xW~}-w&o4l6Lo*uypv+jVO)TOQ~-T@QZ(mvv4q4Mj5zaIZCk%HHhq0H zLK=_10lZUfusr4gcC#N0EK2slAg8Lj52Gs4%FAeT%|%l2?(#t`Y$zw}yNh04Jna1G zjLE)PrWo4wJbUr@bJ@K={V`r=4#Me)C{vZK$o?*p5njFxZfyc$KFYMHcfI`=fwcAi z5akj50l}sK;{uL`Bo-d#`g}QSkFM4->HPl9S(0$tK8gzmCR+CAg$r+O4Ukt~xqZ7S zMAwJ0sQv5~h z_I?F;!Cii5X7P`H z=J)yYj~%OgT4Gs-iE+ZUI8Y?XxOwr474I-n$m=`W=jPxXUn4RXr-+7`J2!80c+Q{>mK)m*y!uC;uah9lxFBDmDs9lr|N*N=o9+DG7vuW4{5ry?SUECLxW(Lv6#IuNV9E&>8Xh zt^TKN9&Bh;AG~?{w$o`Ru&4Z2zKmtZ@`g|^QPxoDR6Av@kFUn-w$g6^&LOYc6zC4g zCsk?1mO*2YrL{at6Z2yrxnPMjVl-TC{0dpweJFxH&Ng-nrtk$NxQ%5l@+_ot4{^PX zP%bfth2Y}YW;LvDASi2h*>ujEoE&xZXHuR^{*rw#8ty{f6^;-$PG}g0ER9%Z50Sxj zVJSue^$``|jXv)(EZig*_WSJF50S$)9664%UxZ%%rEC?1c!aJ+FGBo4$qh^=0&4l2YhE%tQRw4OhLVd(~ded<&la1w^C6wYC*gJbm}@;y?K znNRs$)W3?=br4VK%+Tub#Wjtk4yevyAMRMT3S|ctb1@w6di+?s`0XnNe1cINk(fGN zKtOe2Ml-NW$F$CZ9mwR>fn11El|jyExF?ox^wj0fyeoks@=JYvB;rRp!BQC(`j{O% zh(>j*ek+(K6ueY?F+(BRs?F{ex_k+T@WJLm4%NaN8b}G7R~EW`49qx!|zAedo@*DPR78ORaB+&gRWd@m~t3<&jlX3C=B``Rh61uyRG`C~gUYBc;ak-xgS9aY7hCa)qwge=@g zzucTQX44RMS19I#%k7bdfx%_F)!SW$UJgR8pq3PG0^kfs*Gw# zY`J>pgJ!_y0L?aJsoxiwn34c$q7acHWDM_AF@VX)u|I-SKty%CwQe8(w$1EW-8%1C zmN+`eg9+uR8lw>K2H|t))sa3BF6MKRX@UL$Y{&-$0s}5mJ2P?SI+IKup<&AzXJWDm z%th7I3Cr!9lCu6=H?tiu5R4uX`4@ujjlxra*muG3uJJojErlTX9mVYiny@#pHSZ@g zui70w5|~~biA=x(=NI<7U0Id0qw#T+tdR!EkC97UyM3GfIn{C)iMKs!I2hdSBFppa z)Y&}KyQccjZ0K)!{dzmtu2y713iD>D>0lQRQK`(_m}aZYV zNRZ~hac4NZBAklm46Z!qbxr2D9MS1e*w!b1c!g^h{Ss4yl-n)8ak9sWQzClG`}WLKk$(>|5wT0Yk-$;BVkZzGI$#o!t zp`kCZ>_?`K7Tds8&jX8fKd}PVm~py{0kd%c)#MYRrhD#)wUioJfk0sz*F7&q_J?VZ z=aBP9(0375o$9c{bk97oH+L;j2GZ@rKZuKpDpQq9#NUjq=Rb?RVO>0XRvjM_VaV$F z1a*xwhGm(Vn+98tdwzuTEd9(8WZo{wJ30nWiF-XRdURkLr|sg$v5mlAhu|mu2CaT@ z2VsZ=prFGrSJczyf$jRRy+OoodnDXoIB}Ihl3L5^Zzb{DFveT$<++*KtfShAmZ8^v95qD(XJ{F1nKT_^HZ%)fF6A*=DK4&p&?aG={uGc%OYN0W%22`!Qhx%@ zgUKBm%kDmVj{xH~tp7gQ_K->e9=O>YAIcNr*`w-~B9}ryDL^J|S7w<=78=eT$94IA zOp`nk9eY2{yF0w1eVs2Re2^4e<#SFRkv&34R{s0p4T+-Bb8fc`nr8MG=#KQxWO`88 zM_ECoa3B0u3@8-4z#ZHU>#M8r<2EEm*F}`KZCkcxjRwgRikXJZrHWwc$v%xryGQpR zlvkOZ^?Tq`cmc`c^b739@p8C2(;K>eWSI9VJe(bI<{&ojB5WO15Ga&SpNGd(pWBi) zMszmzn&MDTmi@Mn9VD3e@Zp2%o;_?LUXYKl6M+3RANTCPBN73Rk92{RY7!`&cyyUC zoDatddYi_U3GzX{Zy=m9j{;xnlF>SAIOrfU;xFCdO@OUPdcg?YCgl9W|Iw&0jMfVw zW)V4oCaQN}>2hq|Sa}%NCjr5`-jL;TNLV)(PhswH9Gle4$C4e7OG(D5M z6cHN(a3T_1I)aDCe!lYUbuA162%aFu4-FkZKItj`k9R+i1Sn+Ym)sDjiA6XQz!qT6 zqX^fs$ixNKAEWn6?&g~~u>b&37p8~PwxV7QfQiv$1mijI-51aBTSA5=pe-p3u$(eq zMh!=_ZAH;fKvb7S;P0b?6|II3TLer55`5Sb(R3#*Y&S|darH&5cgZJ~te3RI)iwhX z!hyd%&;zmdjmy~CO@d;{f%2|RM5LhvutUutv2L9f_DV76g_!s6)zZMIHC1I~C=B%+ zK+lq63cU7d2~npKVIlabb-@1PjmN8r(Bm6AfLT*y?c3asz^9^tBEZ0sz+I}9Rc5yp z40;a#^rSkH`m1PJAPCWo-+fmm7$|F@7o%8L9lRx~f`0~xo6vaVd0sEce z%Lmbeluf)riPFg%)|P{_*~OkVmV-0cMfBTj_AE(Xkr0`Tgv$ppZ`Qdy0libT$eLH9 zb>|{Tj=4zMBngf|+%2db0+uC-n_vmF;aNCZDL(*mI)LZ;3hx4N1AIou{0`g<0R+&s zAV7aya0dqS3hDYqEaJH{m+7n{js>U8JfhXET)u2lhhH0vk=7VCDBGTj6UuXKUkukak|=mDH6>IR=J!?1IgunRgm21 zy|l;Pl;_b?0@Oo2vB-3Gk_-V1XOB|Cy=soGh~z~}nNeJo(Tcx*lxNv*Yg~EaeBexd z6r+C`p*S0w(9G76R7H(|_*37B!(yWUCQtfY%8@^CUMFt)=zEt%0Pm*^WpwzJ24dpF zB}MbH$=r2_<)~<5II=3aT@Cb8dS`L}vRXF(>qC=yR$VxwR!y1K{%A+TY3Mjn%VVJz0puYT8bklg`F z+cD1T8LS+!k3<*ttEKnc?jvV(Eng1|P1tX|BpVMo|oPnMwA91EO__@z72QO}c^0PlS< z->jPN-_t?bYB@P2Aw~W2T@M)=w0jk{`SPS80BOPh`|rVMI&Cr@?z@4pG99gsXs#xE z_(^;bI4Ouunu*ViA4AxW z!XQM{p$Bq?)V*@ofpccqA(SU}f6ntp8#V3;VnANZAmSY|>C33EQ3+Kiv^L!HypBH@ zI#zrcoIO6{W$9G@kw@yaOcP`Y=Y#U-X(G_k`A$_DhCgZI6N(%`7$f%@(hVRMJ*$0$ zZAVRbbjG3rjR1%!3&jOJZ-5*YplDRLZ+~rDPI-m%kAtYN!|}`_(G^Oj=mhRynKYFp z#+D7?Afft-+TL)GlY=NVq>CH@K1neZ!`ce*St;$oB>LNmp6SplhT~kI@3yOL^f!hQv_VP~n2GfhMs$Xjmg#Gwm#y7PZM z-6bOmoy=k++o53Jk5)c)?(4WWyj-QP?&=G+5<478I_8bwRK$7Ab4RqcQD5) z7?N3>2D_P-B+|rPCmAi77b1)?EvZ8stM^BzEygXKIeq#tavK~H*P|*VC$kTnhkh%D zL@ChMa8wY)M3B4yNtqVtDaK$ePl^+Jiwc$M6{Y7l@>2N$EiUZm#1}7Cg3yF9G1IEDI&y29TS zyfkh7arjr+yEA`34D4#|%L4~mSV*eEV_ zSGLg~2>~USW^$1vSsZdkEK=7i^^2^#q3lI@gk~NAaBXe;DC&qfUVq6iK zhxS3xLv%aw#SqYmx8Fo9kB_xCrT(@>CsN#I2+7m{j$>2>;CB&2w_`_+K!bJtYtrYj zemoCx1cUSbO1B1qu?ty@CSx7OP5|7kvaDjf^~Oz6#R;ebG8Xh>`AJ1GK7pYK4082^ zK7fF8vT1=VjObXBUlWv`&MV{mBa}8CX*MC4A=~LL%qW`K(`Hlub}pfjkuc^2&|ZX8 zkxbqoFX>7e2I0}!p2fFp<;vaosdJYu>5#S_A`iL#Z)J&laPGm0Ro1rt0_ouqO2uVl z^lU8Nh&hXhIcrpxH=9k|o?oP=>We}LRW)Pyq${2K@*dmmXiZ~)kw4YDHIhGo#R&@*I%}3SZwQ=`vA| za5xN{x1D~T16h{(HVs~yNae4=G!5Mc)L@O{QWNl=h~Z>jLe0pTg?=2g5HZSTolVtk z*r@rmS-k}5q)Dm0w2_p$l&3=3Bi?zF^kTRxN3o>66_wg>BY$DM`c}!x1SOH$5nHnv z*lkYyFbDy{=HA_0O~sWq!Z`#1^6o%QaeRQ(+ov7MCR;Zq>~*ok*+9<;=!-z5OVT1h zc3z|XNHju{0W!m(bQxo{LP3Udyb}Nco^tEQY|HahZIb(HVx>zI)!InnNL%CFT?NSw z2&E7@R=(uk(8W3+(aXDgipEqRqA03;bPu|TC{)wP9@J3h76Q_RF@*|)HZ#hu?TQ2K zryz`Ilx;w&A!YOVgX*tc>f8d1X;aSKu_kJK;}1e6hp9w zAhDx=y7f40ZaMtN!{flXd^re^UL>^7`@n@UGlu#W2?F?%{Zd!QpE3-N&JO&$1(r}B zm{KC2*k~2qd9ym94L`Q-@cRV>kcFbd_5OC3z^MKO&>-qPtt06d+w?Qz<3Hqb5t$ zjBY66J@H9!x;_J!2dg)82)hBNODvnz-y#__KTX&mQe-KZxrLk~K;jan08ghg->RSh zQ7M2m%JD>Up1iKT=;z0lVjEDYOSF$&YJ52) zHiTBe*~s;&(ipY**nT>|MafQseZVV>@op53^?QixXdO_<0>)92oF^>`Hb|# zowx3TrA))2Odb+t&U9?aLvl)x)vQ%U248#bD%uk2gb2s%h6thw7r(sy6Y_|oFv`UB z*s#bA|ZiN$6cYX}1wmDHCUuk5=+p6_-$} z`n(i%W;A44fge%+(6I~Y!ag#VFaC}LwOti`}V+RFsobQH#-Cw^RhB44l z+yojAg-v<2tvS`LoM8uYXi+O4kJub^A$YY?+ydH1gCD?}p0j(4nsNcEb=PmnOY;-{ zDm!2oj^j_t+3`l`1-P38H{SA zfJhQkOGyz_gs=RBNxzD}VYg~J-Pri=Nk|90%s!Ih8ig7?3`$-gG&l51X|Kbx|DQ|e zK=+@mWlM4WAT4Qn2u>!ViE)pj0by@X>Crp;50j6^z5_rqHre3$svt{LM~CARpjX6# zkp~Z;Hq0Drck^=vOG47}aAYuj2(eO-yyJA-fk*lt7~y#-A2PEcj8h2`ZxlTwRmXCn z+8FqL6ijRVyUj*3LUA%TK^>~Ib*m37eU?Ym*>38qs;EG&LZbR)OWX0@pfl|ewOxi^ z@{BXFM28a?b?q;^-pch zQD8x?{===St;(3;&tPbn+{@c!j!W)n?f(0ZYJR7ED z)Vo6zGaPgM@JUc08aIM6TlM#>$D8DnZ62suKH^@+B6X@bk-LKOUjSQ0aMve+AEEta zLz8ALe^2uO5i5{%sp12Cq?{C@4GTzlXj~tPl6ky(R5LzbXk@^U6QXlF}0wIm5;yW-@GJrB#q-^^V`l-ei7-D%hbGK}kzVX|XDeoNhX@8)Ng7yc=OA&uMk%2U$rquzqdkx;;4Ra1zCkF8> z=L!1C$>-7hwV4J@AQ|foh?PpP4E0X#TP_?>j9cQ~csOTx&AxhR{Lf_x(l)E%9-*zgb=%Ci%I z-5ds=DP(MclAki7ZP3?>jQGu9+n6kb)Vl!s8=81P;O(eUqcEgN<5Ax4f`Tg=KYwg} zD!P8X4!p;Up`(>k&`1lv>&ctj*5-i9rmIhI#nr1<07SP3svEMtJ=1~EPvp*v*8i4^soY_cs@3y#f zI3%OS01x3GiUXqg%xoiYZ~?MKMd|-7n^72K|64Yb*U^}UZ6OBOq^p_jILTF212`A5 z$gkfG@+{JwJIwu1s*U_I2_)bP$eAYP6~I36&{OoX6G1OCsHHsiVk5dLu8E-R$Q&}RP zk&!iX8=w%X4|XsxMy5dysdLpz6N93H_V!i=4bt#yIFY`$6m^=n`!~qbfJt^Y@Q`;;$JZ9ILhji0V@-%Z-|#5V~gf{Ze>dwth5nAO=Z4P^~m< zc-3p3ms}M_-K3a2y>gE$20EfOk%fDCaqil!uH7v;Kf4SStf)2L=}I>jb;KR^I*wYr zneHw^JnFwcr~^$|p&MqbS|%nzm=yk%^&Ns)RTP+PFwJ2=slJp1;%KKd6CD%pZE1WM z(^ghkf2HAoYt@cbX?3jC#+bGqc%pya7Xq`~rwA*Q7YQLj`(-bOo+7a@^ph^>eEDpi1)h*$@|Y~6?t9!A zZBSY(jMEJe_?CBXxO+Say~GpCS7E%wfz*y9gdAOcbz#@b2)NY_hZcPNcxU0EDn!^` zh0*tA4mhA$qr+(@WGL8ig<2>8l>bA-{R;Ku2Z;Wd2(yM{U5gr-GAxgJ-X z%B5gv;Z_19wkLQ2U<{|12x&GfF8N&yfH_gUL{J+b``#hJXcHi_T z{rs`*#a}l+bKhNQ+p{PuA^3|XeNCvYbSl3Cfvw3(Xnt!v*L}E;N?|%p`=LUTqFCfrO1KDypK0iPnKgevjV>l{@bLSFEcR2HD>UZ*Srtz$RQN$;zn(0(wIJ zxA#ODGQDa!Ozd^h`4~tIh5sYUxj=5vkgxW6RR1UN3kt@xrT6a!gIGkmXDrfLy`Qi# z)zLOUYFqZJwa^6+o=xp1_~9FeTya|gVcEA=KiiVu1yy=dUZdNwWV`Ro+r(?IU%BBp zcTpwDyL$$N2fD`4D><^Vv$HwsXuxK4(JAh4j^j?z0BDpMoT*Je1JF#1Viq}gUW`1d zdMKN3h!7W^Z-buH%OaRAlKk5JuN#nQM2j4IYpb%$*r%}-D7=Z=LmvJBU+Pl#F4biS z?U6V`m`lqH&)#*E4?t>)4mY0R=fpiEX#d;Ms~0Lg8B*1zPme-~$wILtv3`9*;Q<_n z-Z-MdkbcqVYO0R`hDSnqfgEx>YDW!-Apv7Q+|uug|FfLF?IG65EPN*j==pu}dgz#)OaBQZ^)f)YyarFZ5jS>g7&cy`5qD|Fw4RAZAMZBA;9&N2Xz1=BgVZC}4U-Fp=i%Ak4%*xC z6KR@wA9&5evbU=Xwo`(eL9GE;fAyJNC*DqcsT7zym!_tbW3Z3wJ0Cu*#dd^>u07$c zpmj3bX5{@;eM;-Cbqm4B@9yayM8gKy&;FY9YwgY>H}#Zwu6#6XeWdXT7~RiYBz>R& zCW?|3k&qxZqMAbJlGQIuUD~EE)3$t;vuVwIO?}F5HjJvXnjh`uk#Y?*e`#uT%odBy qoo$t;_OX?;>1gJnr!^S4*Qwj0=berZ8_9?vpRqT!HqJM6x&8xMX+>NB literal 0 HcmV?d00001 From 282b376d21fe76d504500c14e09b6acc863ec4a7 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 4 Feb 2026 13:51:25 -0500 Subject: [PATCH 079/158] Simplify --- cpp/cmake/modules/generate_jit_lto_kernels.cmake | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index dcf985b391..64f2321419 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -187,11 +187,7 @@ function(generate_jit_lto_kernels target) endforeach() foreach(filter_name IN LISTS filter_configs) - if(filter_name STREQUAL "filter_none") - set(header_file "neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh") - elseif(filter_name STREQUAL "filter_bitset") - set(header_file "neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh") - endif() + set(header_file "neighbors/ivf_flat/jit_lto_kernels/${filter_name}.cuh") set(kernel_name "${filter_name}") set(filename "${generated_kernels_dir}/filter_device_functions/fatbin_${kernel_name}.cu") @@ -209,13 +205,7 @@ function(generate_jit_lto_kernels target) endforeach() foreach(post_lambda_name IN LISTS post_lambda_configs) - if(post_lambda_name STREQUAL "post_identity") - set(header_file "neighbors/ivf_flat/jit_lto_kernels/post_identity.cuh") - elseif(post_lambda_name STREQUAL "post_sqrt") - set(header_file "neighbors/ivf_flat/jit_lto_kernels/post_sqrt.cuh") - elseif(post_lambda_name STREQUAL "post_compose") - set(header_file "neighbors/ivf_flat/jit_lto_kernels/post_compose.cuh") - endif() + set(header_file "neighbors/ivf_flat/jit_lto_kernels/${post_lambda_name}.cuh") set(kernel_name "${post_lambda_name}") set(filename "${generated_kernels_dir}/post_lambda_device_functions/${post_lambda_name}.cu") From 3115d075dfdc959016d87a4cb9da116d6a3f605a Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 4 Feb 2026 21:14:03 +0000 Subject: [PATCH 080/158] address reviews --- ...orithmLauncher.h => AlgorithmLauncher.hpp} | 0 ...lgorithmPlanner.h => AlgorithmPlanner.hpp} | 2 +- ...ragmentDatabase.h => FragmentDatabase.hpp} | 4 +- .../{FragmentEntry.h => FragmentEntry.hpp} | 0 ...{MakeFragmentKey.h => MakeFragmentKey.hpp} | 0 ...lFragment.h => RegisterKernelFragment.hpp} | 2 +- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 2 +- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 8 +++- cpp/src/detail/jit_lto/FragmentDatabase.cu | 4 +- cpp/src/detail/jit_lto/FragmentEntry.cu | 2 +- .../ivf_flat_interleaved_scan_kernel.cuh | 46 ------------------- .../ivf_flat/jit_lto_kernels/filter.cu.in | 2 +- .../interleaved_scan_kernel.cu.in | 2 +- .../interleaved_scan_planner.hpp | 6 +-- .../ivf_flat/jit_lto_kernels/metric.cu.in | 2 +- .../jit_lto_kernels/post_lambda.cu.in | 2 +- 16 files changed, 21 insertions(+), 63 deletions(-) rename cpp/include/cuvs/detail/jit_lto/{AlgorithmLauncher.h => AlgorithmLauncher.hpp} (100%) rename cpp/include/cuvs/detail/jit_lto/{AlgorithmPlanner.h => AlgorithmPlanner.hpp} (95%) rename cpp/include/cuvs/detail/jit_lto/{FragmentDatabase.h => FragmentDatabase.hpp} (95%) rename cpp/include/cuvs/detail/jit_lto/{FragmentEntry.h => FragmentEntry.hpp} (100%) rename cpp/include/cuvs/detail/jit_lto/{MakeFragmentKey.h => MakeFragmentKey.hpp} (100%) rename cpp/include/cuvs/detail/jit_lto/{RegisterKernelFragment.h => RegisterKernelFragment.hpp} (94%) diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp similarity index 100% rename from cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.h rename to cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.hpp similarity index 95% rename from cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h rename to cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.hpp index 8126fa8866..93f24d0c6c 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.h +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmPlanner.hpp @@ -8,7 +8,7 @@ #include #include -#include "AlgorithmLauncher.h" +#include "AlgorithmLauncher.hpp" struct FragmentEntry; diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.hpp similarity index 95% rename from cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h rename to cpp/include/cuvs/detail/jit_lto/FragmentDatabase.hpp index 890796ba44..aeb170d861 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.h +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.hpp @@ -9,8 +9,8 @@ #include #include -#include "FragmentEntry.h" -#include "MakeFragmentKey.h" +#include "FragmentEntry.hpp" +#include "MakeFragmentKey.hpp" class FragmentDatabase { public: diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentEntry.h b/cpp/include/cuvs/detail/jit_lto/FragmentEntry.hpp similarity index 100% rename from cpp/include/cuvs/detail/jit_lto/FragmentEntry.h rename to cpp/include/cuvs/detail/jit_lto/FragmentEntry.hpp diff --git a/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h b/cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.hpp similarity index 100% rename from cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.h rename to cpp/include/cuvs/detail/jit_lto/MakeFragmentKey.hpp diff --git a/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h b/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.hpp similarity index 94% rename from cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h rename to cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.hpp index 1910373dcb..5643be6523 100644 --- a/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.h +++ b/cpp/include/cuvs/detail/jit_lto/RegisterKernelFragment.hpp @@ -5,7 +5,7 @@ #pragma once -#include "MakeFragmentKey.h" +#include "MakeFragmentKey.hpp" void registerFatbinFragment(std::string const& algo, std::string const& params, diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 0a21c75d63..d095689f5d 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include +#include #include diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 693e2ee685..2cc7376710 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -8,12 +8,13 @@ #include #include #include +#include #include #include #include -#include -#include +#include +#include #include "cuda_runtime.h" #include "nvJitLink.h" @@ -47,6 +48,9 @@ std::shared_ptr AlgorithmPlanner::get_launcher() { auto& launchers = get_cached_launchers(); auto launch_key = this->entrypoint + this->get_device_functions_key(); + + static std::mutex cache_mutex; + std::lock_guard lock(cache_mutex); if (launchers.count(launch_key) == 0) { add_entrypoint(); add_device_functions(); diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index ea43b987fb..02ea688a0d 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -3,8 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include -#include +#include +#include #include diff --git a/cpp/src/detail/jit_lto/FragmentEntry.cu b/cpp/src/detail/jit_lto/FragmentEntry.cu index 7f0f83ebb9..af1fb90e58 100644 --- a/cpp/src/detail/jit_lto/FragmentEntry.cu +++ b/cpp/src/detail/jit_lto/FragmentEntry.cu @@ -7,7 +7,7 @@ #include -#include +#include FragmentEntry::FragmentEntry(std::string const& key) : compute_key(key) {} diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh index d6d65d8707..907638b3ff 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh @@ -37,52 +37,6 @@ extern __device__ bool sample_filter(index_t* const* const inds_ptrs, template extern __device__ T post_process(T val); -/** - * @brief Copy `n` elements per block from one place to another. - * - * @param[out] out target pointer (unique per block) - * @param[in] in source pointer - * @param n number of elements to copy - */ -template -__device__ inline void copy_vectorized(T* out, const T* in, uint32_t n) -{ - constexpr int VecElems = VecBytes / sizeof(T); // NOLINT - using align_bytes = raft::Pow2<(size_t)VecBytes>; - if constexpr (VecElems > 1) { - using align_elems = raft::Pow2; - if (!align_bytes::areSameAlignOffsets(out, in)) { - return copy_vectorized<(VecBytes >> 1), T>(out, in, n); - } - { // process unaligned head - uint32_t head = align_bytes::roundUp(in) - in; - if (head > 0) { - copy_vectorized(out, in, head); - n -= head; - in += head; - out += head; - } - } - { // process main part vectorized - using vec_t = typename raft::IOType::Type; - copy_vectorized( - reinterpret_cast(out), reinterpret_cast(in), align_elems::div(n)); - } - { // process unaligned tail - uint32_t tail = align_elems::mod(n); - if (tail > 0) { - n -= tail; - copy_vectorized(out + n, in + n, tail); - } - } - } - if constexpr (VecElems <= 1) { - for (int i = threadIdx.x; i < n; i += blockDim.x) { - out[i] = in[i]; - } - } -} - /** * @brief Load a part of a vector from the index and from query, compute the (part of the) distance * between them, and aggregate it using the provided Lambda; one structure per thread, per query, diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in index 4a6be97fa0..934e36dba7 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in @@ -18,7 +18,7 @@ template __device__ bool sample_filter(int64_t* const* const, const uin #else -#include +#include #include "@filter_name@.h" __attribute__((__constructor__)) static void register_@filter_name@() diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in index b8ce64c5ea..8c972221bc 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in @@ -21,7 +21,7 @@ template __global__ void interleaved_scan_kernel<@capacity@, @veclen@, @ascendin #else -#include +#include #include #include "interleaved_scan_kernel_@capacity@_@veclen@_@ascending@_@compute_norm@_@type_abbrev@_@acc_abbrev@_@idx_abbrev@.h" diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp index 1a8217524a..792c64f39a 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -5,9 +5,9 @@ #pragma once -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in index 11fb87b361..0f6bb904d1 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in @@ -18,7 +18,7 @@ template __device__ void compute_dist<@veclen@, @data_type@, @acc_type@>(@acc_ty #else -#include +#include #include #include "metric_@metric_name@_@veclen@_@type_abbrev@_@acc_abbrev@.h" diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in index 7cb702122c..abf156a133 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda.cu.in @@ -18,7 +18,7 @@ template __device__ float post_process(float); #else -#include +#include #include "@post_lambda_name@.h" __attribute__((__constructor__)) static void register_@post_lambda_name@() From 4bd2102ae33636d68acdd36fe1f0dfb4b5c4bce4 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 10 Feb 2026 20:20:11 +0000 Subject: [PATCH 081/158] add to docs and log about jit --- cpp/include/cuvs/neighbors/ivf_flat.hpp | 12 ++++++++++++ cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 1 + 2 files changed, 13 insertions(+) diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index b28c01de04..23c6dd4944 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -1401,6 +1401,9 @@ void extend(raft::resources const& handle, /** * @brief Search ANN using the constructed index. + * This function JIT compiles the kernel for the very first usage, after which it maintains an + * in-memory and disk-based cache of the compiled kernels. We recommend running a warmup search + * before the actual searches to avoid the first-time JIT compilation overhead. * * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. * @@ -1442,6 +1445,9 @@ void search(raft::resources const& handle, /** * @brief Search ANN using the constructed index. + * This function JIT compiles the kernel for the very first usage, after which it maintains an + * in-memory and disk-based cache of the compiled kernels. We recommend running a warmup search + * before the actual searches to avoid the first-time JIT compilation overhead. * * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. * @@ -1482,6 +1488,9 @@ void search(raft::resources const& handle, cuvs::neighbors::filtering::none_sample_filter{}); /** * @brief Search ANN using the constructed index. + * This function JIT compiles the kernel for the very first usage, after which it maintains an + * in-memory and disk-based cache of the compiled kernels. We recommend running a warmup search + * before the actual searches to avoid the first-time JIT compilation overhead. * * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. * @@ -1523,6 +1532,9 @@ void search(raft::resources const& handle, /** * @brief Search ANN using the constructed index. + * This function JIT compiles the kernel for the very first usage, after which it maintains an + * in-memory and disk-based cache of the compiled kernels. We recommend running a warmup search + * before the actual searches to avoid the first-time JIT compilation overhead. * * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. * diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 2cc7376710..b2487282dc 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -54,6 +54,7 @@ std::shared_ptr AlgorithmPlanner::get_launcher() if (launchers.count(launch_key) == 0) { add_entrypoint(); add_device_functions(); + RAFT_LOG_INFO("JIT compiling launcher for key: %s", launch_key.c_str()); launchers[launch_key] = this->build(); } return launchers[launch_key]; From ba758a227bcdb771fbd59326047b75b7455aecbe Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 10 Feb 2026 20:23:11 +0000 Subject: [PATCH 082/158] address review --- cpp/cmake/modules/generate_jit_lto_kernels.cmake | 6 +----- .../{metric_euclidean_dist.cuh => metric_euclidean.cuh} | 0 2 files changed, 1 insertion(+), 5 deletions(-) rename cpp/src/neighbors/ivf_flat/jit_lto_kernels/{metric_euclidean_dist.cuh => metric_euclidean.cuh} (100%) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 64f2321419..671b08321c 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -163,11 +163,7 @@ function(generate_jit_lto_kernels target) endforeach() foreach(metric_name IN LISTS metric_configs) - if(metric_name STREQUAL "euclidean") - set(header_file "neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh") - elseif(metric_name STREQUAL "inner_prod") - set(header_file "neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh") - endif() + set(header_file "neighbors/ivf_flat/jit_lto_kernels/metric_${metric_name}.cuh") set(kernel_name "metric_${metric_name}_${veclen}_${type_abbrev}_${acc_abbrev}") set(filename "${generated_kernels_dir}/metric_device_functions/fatbin_${kernel_name}.cu") diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean.cuh similarity index 100% rename from cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean_dist.cuh rename to cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_euclidean.cuh From 42b78ae38b858c54ce10e37dd47e3ca29008606b Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 10 Feb 2026 23:30:38 +0000 Subject: [PATCH 083/158] rename inner_product to inner_prod --- .../{metric_inner_product.cuh => metric_inner_prod.cuh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cpp/src/neighbors/ivf_flat/jit_lto_kernels/{metric_inner_product.cuh => metric_inner_prod.cuh} (100%) diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_prod.cuh similarity index 100% rename from cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_product.cuh rename to cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_inner_prod.cuh From f6377fa4b1a92910803e029acce469451c35c400 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 10 Feb 2026 23:54:47 +0000 Subject: [PATCH 084/158] include header and form better log --- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index b2487282dc..93d1f205e7 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -19,6 +19,7 @@ #include "cuda_runtime.h" #include "nvJitLink.h" +#include #include void AlgorithmPlanner::add_entrypoint() @@ -54,7 +55,13 @@ std::shared_ptr AlgorithmPlanner::get_launcher() if (launchers.count(launch_key) == 0) { add_entrypoint(); add_device_functions(); - RAFT_LOG_INFO("JIT compiling launcher for key: %s", launch_key.c_str()); + std::string log_message = + "JIT compiling launcher for entrypoint: " + this->entrypoint + " and device functions: "; + for (const auto& device_function : this->device_functions) { + log_message += device_function + ","; + } + log_message.pop_back(); + RAFT_LOG_INFO("%s", log_message.c_str()); launchers[launch_key] = this->build(); } return launchers[launch_key]; From fb7f1059ba1dce106f303a950e37908ef1de54bf Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 10 Feb 2026 23:56:58 +0000 Subject: [PATCH 085/158] merge --- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index e30e87c31e..f679f9f0bd 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -13,18 +13,8 @@ #include #include -<<<<<<< HEAD -#include -#include -#include -#include -#include - == == == - = #include #include - >>>>>>> jit - lto - ivf - flat - - interleaved #include "cuda_runtime.h" #include "nvJitLink.h" @@ -32,8 +22,7 @@ #include #include - void - AlgorithmPlanner::add_entrypoint() +void AlgorithmPlanner::add_entrypoint() { auto entrypoint_fragment = fragment_database().get_fragment(this->entrypoint); this->fragments.push_back(entrypoint_fragment); From 432bb32d48357201068e29f03384b79d94441477 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 11 Feb 2026 05:57:09 +0000 Subject: [PATCH 086/158] working through --- .../modules/generate_jit_lto_kernels.cmake | 141 +++ .../jit_lto/cagra/search_single_cta_tags.hpp | 48 + cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 8 - .../compute_distance_standard.cu.in | 38 + .../compute_distance_standard_impl.cuh | 53 ++ .../compute_distance_vpq.cu.in | 38 + .../compute_distance_vpq_impl.cuh | 67 ++ .../search_single_cta_planner.hpp | 78 ++ .../setup_workspace_standard.cu.in | 38 + .../setup_workspace_standard_impl.cuh | 55 ++ .../jit_lto_kernels/setup_workspace_vpq.cu.in | 38 + .../setup_workspace_vpq_impl.cuh | 67 ++ ...search_single_cta_kernel_explicit_inst.cuh | 12 + .../search_single_cta_kernel_jit-inl.cuh | 863 ++++++++++++++++++ .../cagra/search_single_cta_kernel_jit.cuh | 106 +++ 15 files changed, 1642 insertions(+), 8 deletions(-) create mode 100644 cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_kernel_explicit_inst.cuh create mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit-inl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit.cuh diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 671b08321c..53095fc425 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -217,4 +217,145 @@ function(generate_jit_lto_kernels target) EMBEDDED_ARRAY "embedded_${kernel_name}" ) endforeach() + + # Generate CAGRA device function fragments + set(cagra_data_types "float" "__half" "uint8_t" "int8_t") + set(cagra_data_type_abbrevs "f" "h" "uc" "sc") + set(cagra_index_type "uint32_t") + set(cagra_index_abbrev "ui") + set(cagra_distance_type "float") + set(cagra_distance_abbrev "f") + set(cagra_metrics "L2Expanded" "InnerProduct" "CosineExpanded") + set(cagra_metric_abbrevs "l2" "ip" "cos") + set(cagra_team_sizes 8 16 32) + set(cagra_dataset_block_dims 128 256 512) + set(cagra_pq_bits 8) + set(cagra_pq_lens 2 4) + set(cagra_codebook_type "half") + + # Generate standard descriptor fragments + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(metric_idx IN ITEMS 0 1 2) + list(GET cagra_metrics ${metric_idx} metric) + list(GET cagra_metric_abbrevs ${metric_idx} metric_name) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # setup_workspace_standard + set(kernel_name + "setup_workspace_standard_${metric_name}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(metric_cpp "cuvs::distance::DistanceType::${metric}") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # compute_distance_standard + set(kernel_name + "compute_distance_standard_${metric_name}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(metric_cpp "cuvs::distance::DistanceType::${metric}") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + endforeach() + + # Generate VPQ descriptor fragments (only for L2Expanded and float/half) + foreach(data_idx IN ITEMS 0 1) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + foreach(pq_len IN LISTS cagra_pq_lens) + # setup_workspace_vpq + set(kernel_name + "setup_workspace_vpq_l2_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") + set(metric_name "l2") + set(pq_bits "${cagra_pq_bits}") + set(codebook_type "${cagra_codebook_type}") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # compute_distance_vpq + set(kernel_name + "compute_distance_vpq_l2_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") + set(metric_name "l2") + set(pq_bits "${cagra_pq_bits}") + set(codebook_type "${cagra_codebook_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + endforeach() endfunction() diff --git a/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp b/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp new file mode 100644 index 0000000000..b6ed4c786c --- /dev/null +++ b/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp @@ -0,0 +1,48 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +namespace cuvs::neighbors::cagra::detail { + +// Tag types for data types +struct tag_f {}; // float +struct tag_h {}; // __half +struct tag_sc {}; // int8_t +struct tag_uc {}; // uint8_t + +// Tag types for index types +struct tag_idx_ui {}; // uint32_t + +// Tag types for distance types +struct tag_dist_f {}; // float + +// Tag types for distance metrics +struct tag_metric_l2 {}; +struct tag_metric_inner_product {}; +struct tag_metric_cosine {}; +struct tag_metric_hamming {}; + +// Tag types for team sizes +struct tag_team_8 {}; +struct tag_team_16 {}; +struct tag_team_32 {}; + +// Tag types for dataset block dimensions +struct tag_dim_128 {}; +struct tag_dim_256 {}; +struct tag_dim_512 {}; + +// Tag types for sample filter types +struct tag_filter_none {}; +struct tag_filter_bitset {}; + +// Tag types for VPQ parameters +struct tag_pq_bits_8 {}; +struct tag_pq_len_2 {}; +struct tag_pq_len_4 {}; +struct tag_codebook_half {}; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index f679f9f0bd..c5821459ae 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -106,14 +106,6 @@ std::shared_ptr AlgorithmPlanner::build() result = nvJitLinkGetLinkedCubin(handle, cubin.get()); check_nvjitlink_result(handle, result); - // Dump CUBIN for analysis with cuobjdump - if (dump_cubin) { - std::string filename = "/tmp/jit_kernel_" + std::to_string(dump_counter++) + ".cubin"; - std::ofstream out(filename, std::ios::binary); - out.write(cubin.get(), cubin_size); - std::cerr << "Dumped CUBIN to: " << filename << " (" << cubin_size << " bytes)" << std::endl; - } - result = nvJitLinkDestroy(&handle); check_nvjitlink_result(handle, result); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in new file mode 100644 index 0000000000..833ad70df5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Instantiate the compute_distance_standard function for standard descriptor +template __device__ @distance_type@ compute_distance_standard<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( + const @data_type@*, uint32_t, @index_type@, uint32_t, uint32_t, uint32_t, const @distance_type@*); + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search + +#else + +#include +#include +#include "compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@", + embedded_compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh new file mode 100644 index 0000000000..62d8796526 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh @@ -0,0 +1,53 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_standard-impl.cuh" +#include "../device_common.hpp" + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Extern function implementation for compute_distance_standard (standard descriptor) +template +__device__ DistanceT compute_distance_standard(const DataT* dataset_ptr, + uint32_t smem_ws_ptr, + IndexT dataset_index, + uint32_t dim, + uint32_t ld, + uint32_t team_size_bitshift, + const DistanceT* dataset_norms) +{ + using desc_type = cuvs::neighbors::cagra::detail:: + standard_dataset_descriptor_t; + using base_type = typename desc_type::base_type; + using args_t = typename base_type::args_t; + + // Reconstruct args_t from parameters + args_t args; + args.smem_ws_ptr = smem_ws_ptr; + args.dim = dim; + args.extra_word1 = ld; // dataset_ld + args.extra_ptr1 = (void*)dataset_ptr; // dataset_ptr + args.extra_ptr2 = (void*)dataset_norms; // dataset_norms + + // Call the free function compute_distance_standard + auto per_thread_distances = + cuvs::neighbors::cagra::detail::compute_distance_standard(args, dataset_index); + + // Use team_sum with the provided team_size_bitshift + return device::team_sum(per_thread_distances, team_size_bitshift); +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in new file mode 100644 index 0000000000..811dd16ae5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Instantiate the compute_distance_vpq function for VPQ descriptor +template __device__ @distance_type@ compute_distance_vpq<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( + const uint8_t*, uint32_t, @index_type@, uint32_t, const @codebook_type@*, const @codebook_type@*, uint32_t); + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search + +#else + +#include +#include +#include "compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh new file mode 100644 index 0000000000..493d2b884e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh @@ -0,0 +1,67 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_vpq-impl.cuh" +#include "../device_common.hpp" + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Extern function implementation for compute_distance_vpq (VPQ descriptor) +template +__device__ DistanceT compute_distance_vpq(const uint8_t* encoded_dataset_ptr, + uint32_t smem_ws_ptr, + IndexT dataset_index, + uint32_t encoded_dataset_dim, + const CodebookT* vq_code_book_ptr, + const CodebookT* pq_code_book_ptr, + uint32_t team_size_bitshift) +{ + using desc_type = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; + using base_type = typename desc_type::base_type; + using args_t = typename base_type::args_t; + + // Reconstruct args_t from parameters + args_t args; + args.smem_ws_ptr = smem_ws_ptr; + args.dim = encoded_dataset_dim; + args.extra_word1 = encoded_dataset_dim; + args.extra_ptr1 = (void*)encoded_dataset_ptr; + args.extra_ptr2 = (void*)vq_code_book_ptr; + // Note: pq_code_book_ptr is stored in shared memory (copied during setup_workspace_vpq), + // and compute_distance_vpq accesses it via args.smem_ws_ptr, so we don't need to pass it + // separately. + + // Call the free function compute_distance_vpq + // It will access the codebook from shared memory via smem_ws_ptr + auto per_thread_distances = + cuvs::neighbors::cagra::detail::compute_distance_vpq(args, dataset_index); + + // Use team_sum with the provided team_size_bitshift + return device::team_sum(per_thread_distances, team_size_bitshift); +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp new file mode 100644 index 0000000000..4f0492fea9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -0,0 +1,78 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +template +struct CagraSearchPlanner : AlgorithmPlanner { + CagraSearchPlanner(bool topk_by_bitonic_sort, bool bitonic_sort_and_merge_multi_warps) + : AlgorithmPlanner("search_single_cta_kernel_" + bool_to_string(topk_by_bitonic_sort) + "_" + + bool_to_string(bitonic_sort_and_merge_multi_warps), + make_fragment_key()) + { + } + + void add_setup_workspace_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + std::string key = "setup_workspace_"; + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + if (is_vpq) { key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } + this->device_functions.push_back(key); + } + + void add_compute_distance_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + std::string key = "compute_distance_"; + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + if (is_vpq) { key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } + this->device_functions.push_back(key); + } + + void add_sample_filter_device_function(std::string filter_name) + { + this->device_functions.push_back("sample_filter_" + filter_name); + } + + private: + static std::string bool_to_string(bool b) { return b ? "true" : "false"; } + + static std::string metric_to_string(cuvs::distance::DistanceType metric) + { + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2Unexpanded: return "L2Expanded"; + case cuvs::distance::DistanceType::InnerProduct: return "InnerProduct"; + case cuvs::distance::DistanceType::CosineExpanded: return "CosineExpanded"; + case cuvs::distance::DistanceType::BitwiseHamming: return "BitwiseHamming"; + default: return "Unknown"; + } + } +}; + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in new file mode 100644 index 0000000000..98399e1355 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Instantiate the setup_workspace_standard function for standard descriptor +template __device__ uint32_t setup_workspace_standard<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( + void*, const @data_type@*, uint32_t, const @data_type@*, @index_type@, uint32_t, uint32_t, const @distance_type@*); + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search + +#else + +#include +#include +#include "setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@", + embedded_setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh new file mode 100644 index 0000000000..5283cc9659 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh @@ -0,0 +1,55 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_standard-impl.cuh" +#include "../device_common.hpp" + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Extern function implementation for setup_workspace_standard (standard descriptor) +template +__device__ uint32_t setup_workspace_standard(void* smem, + const DataT* queries, + uint32_t query_id, + const DataT* dataset_ptr, + IndexT dataset_size, + uint32_t dim, + uint32_t ld, + const DistanceT* dataset_norms) +{ + using desc_type = cuvs::neighbors::cagra::detail:: + standard_dataset_descriptor_t; + + // Create a temporary descriptor on the stack + desc_type temp_desc(reinterpret_cast( + &cuvs::neighbors::cagra::detail::setup_workspace_standard), + reinterpret_cast( + &cuvs::neighbors::cagra::detail::compute_distance_standard), + dataset_ptr, + dataset_size, + dim, + ld, + dataset_norms); + + // Call the free function setup_workspace_standard which copies descriptor to smem + const desc_type* result = cuvs::neighbors::cagra::detail::setup_workspace_standard( + &temp_desc, smem, queries, query_id); + + // Return the smem_ws_ptr from the descriptor's args + return result->args.smem_ws_ptr; +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in new file mode 100644 index 0000000000..6501272572 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Instantiate the setup_workspace_vpq function for VPQ descriptor +template __device__ uint32_t setup_workspace_vpq<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( + void*, const @data_type@*, uint32_t, const uint8_t*, uint32_t, const @codebook_type@*, const @codebook_type@*, @index_type@, uint32_t); + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search + +#else + +#include +#include +#include "setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh new file mode 100644 index 0000000000..f0245030b2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh @@ -0,0 +1,67 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_vpq-impl.cuh" +#include "../device_common.hpp" + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Extern function implementation for setup_workspace_vpq (VPQ descriptor) +template +__device__ uint32_t setup_workspace_vpq(void* smem, + const DataT* queries, + uint32_t query_id, + const uint8_t* encoded_dataset_ptr, + uint32_t encoded_dataset_dim, + const CodebookT* vq_code_book_ptr, + const CodebookT* pq_code_book_ptr, + IndexT dataset_size, + uint32_t dim) +{ + using desc_type = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; + + // Create a temporary descriptor on the stack + desc_type temp_desc(reinterpret_cast( + &cuvs::neighbors::cagra::detail::setup_workspace_vpq), + reinterpret_cast( + &cuvs::neighbors::cagra::detail::compute_distance_vpq), + encoded_dataset_ptr, + encoded_dataset_dim, + vq_code_book_ptr, + pq_code_book_ptr, + dataset_size, + dim); + + // Call the free function setup_workspace_vpq which copies descriptor to smem + const desc_type* result = cuvs::neighbors::cagra::detail::setup_workspace_vpq( + &temp_desc, smem, queries, query_id); + + // Return the smem_ws_ptr from the descriptor's args + return result->args.smem_ws_ptr; +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_explicit_inst.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_explicit_inst.cuh new file mode 100644 index 0000000000..e93d24aaf6 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_explicit_inst.cuh @@ -0,0 +1,12 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#ifdef CUVS_ENABLE_JIT_LTO +#include "search_single_cta_kernel_jit.cuh" +#else +#include "search_single_cta_kernel-inl.cuh" +#endif diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit-inl.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit-inl.cuh new file mode 100644 index 0000000000..56201c2671 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit-inl.cuh @@ -0,0 +1,863 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include "search_single_cta_kernel_jit.cuh" + +#include "bitonic.hpp" +#include "device_common.hpp" +#include "hashmap.hpp" +#include "search_plan.cuh" +#include "topk_by_radix.cuh" +#include "topk_for_cagra/topk.h" +#include "utils.hpp" + +#include +#include +#include +#include +#include +#include + +#include + +#include "../ann_utils.cuh" + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// JIT version of compute_distance_to_random_nodes - uses extern compute_distance +template +RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( + IndexT* __restrict__ result_indices_ptr, + DistanceT* __restrict__ result_distances_ptr, + const DataT* dataset_ptr, + const uint8_t* encoded_dataset_ptr, + uint32_t smem_ws_ptr, + IndexT dataset_size, + uint32_t dim, + uint32_t encoded_dataset_dim, + uint32_t ld, + uint32_t team_size_bitshift, + const DistanceT* dataset_norms, + const CodebookT* vq_code_book_ptr, + const CodebookT* pq_code_book_ptr, + const uint32_t num_pickup, + const uint32_t num_distilation, + const uint64_t rand_xor_mask, + const IndexT* __restrict__ seed_ptr, + const uint32_t num_seeds, + IndexT* __restrict__ visited_hash_ptr, + const uint32_t visited_hash_bitlen, + IndexT* __restrict__ traversed_hash_ptr, + const uint32_t traversed_hash_bitlen, + const uint32_t block_id = 0, + const uint32_t num_blocks = 1) +{ + constexpr unsigned warp_size = 32; + const auto max_i = raft::round_up_safe(num_pickup, warp_size >> team_size_bitshift); + + for (uint32_t i = threadIdx.x >> team_size_bitshift; i < max_i; + i += (blockDim.x >> team_size_bitshift)) { + const bool valid_i = (i < num_pickup); + + IndexT best_index_team_local = raft::upper_bound(); + DistanceT best_norm2_team_local = raft::upper_bound(); + for (uint32_t j = 0; j < num_distilation; j++) { + IndexT seed_index = 0; + if (valid_i) { + uint32_t gid = block_id + (num_blocks * (i + (num_pickup * j))); + if (seed_ptr && (gid < num_seeds)) { + seed_index = seed_ptr[gid]; + } else { + seed_index = device::xorshift64(gid ^ rand_xor_mask) % dataset_size; + } + } + + DistanceT norm2 = 0; + if constexpr (DescType == DescriptorType::Standard) { + norm2 = + valid_i + ? compute_distance_standard( + dataset_ptr, smem_ws_ptr, seed_index, dim, ld, team_size_bitshift, dataset_norms) + : 0; + } else if constexpr (DescType == DescriptorType::VPQ) { + norm2 = valid_i ? compute_distance_vpq(encoded_dataset_ptr, + smem_ws_ptr, + seed_index, + encoded_dataset_dim, + vq_code_book_ptr, + pq_code_book_ptr, + team_size_bitshift) + : 0; + } + const auto norm2_sum = device::team_sum(norm2, team_size_bitshift); + + if (valid_i && (norm2_sum < best_norm2_team_local)) { + best_norm2_team_local = norm2_sum; + best_index_team_local = seed_index; + } + } + + const unsigned lane_id = threadIdx.x & ((1u << team_size_bitshift) - 1u); + if (valid_i && lane_id == 0) { + if (best_index_team_local != raft::upper_bound()) { + if (hashmap::insert(visited_hash_ptr, visited_hash_bitlen, best_index_team_local) == 0) { + best_norm2_team_local = raft::upper_bound(); + best_index_team_local = raft::upper_bound(); + } else if ((traversed_hash_ptr != nullptr) && + hashmap::search( + traversed_hash_ptr, traversed_hash_bitlen, best_index_team_local)) { + best_norm2_team_local = raft::upper_bound(); + best_index_team_local = raft::upper_bound(); + } + } + result_distances_ptr[i] = best_norm2_team_local; + result_indices_ptr[i] = best_index_team_local; + } + } +} + +// JIT version of compute_distance_to_child_nodes - uses extern compute_distance +template +RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( + IndexT* __restrict__ result_child_indices_ptr, + DistanceT* __restrict__ result_child_distances_ptr, + const DataT* dataset_ptr, + const uint8_t* encoded_dataset_ptr, + uint32_t smem_ws_ptr, + uint32_t dim, + uint32_t encoded_dataset_dim, + uint32_t ld, + uint32_t team_size_bitshift, + const DistanceT* dataset_norms, + const CodebookT* vq_code_book_ptr, + const CodebookT* pq_code_book_ptr, + const IndexT* __restrict__ knn_graph, + const uint32_t knn_k, + IndexT* __restrict__ visited_hashmap_ptr, + const uint32_t visited_hash_bitlen, + IndexT* __restrict__ traversed_hashmap_ptr, + const uint32_t traversed_hash_bitlen, + const IndexT* __restrict__ parent_indices, + const IndexT* __restrict__ internal_topk_list, + const uint32_t search_width, + int* __restrict__ result_position = nullptr, + const int max_result_position = 0) +{ + constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; + constexpr IndexT invalid_index = ~static_cast(0); + + // Read child indices of parents from knn graph and check if the distance computation is + // necessary. + for (uint32_t i = threadIdx.x; i < knn_k * search_width; i += blockDim.x) { + const IndexT smem_parent_id = parent_indices[i / knn_k]; + IndexT child_id = invalid_index; + if (smem_parent_id != invalid_index) { + const auto parent_id = internal_topk_list[smem_parent_id] & ~index_msb_1_mask; + child_id = knn_graph[(i % knn_k) + (static_cast(knn_k) * parent_id)]; + } + if (child_id != invalid_index) { + if (hashmap::insert(visited_hashmap_ptr, visited_hash_bitlen, child_id) == 0) { + child_id = invalid_index; + } else if ((traversed_hashmap_ptr != nullptr) && + hashmap::search( + traversed_hashmap_ptr, traversed_hash_bitlen, child_id)) { + child_id = invalid_index; + } + } + if (STATIC_RESULT_POSITION) { + result_child_indices_ptr[i] = child_id; + } else if (child_id != invalid_index) { + int j = atomicSub(result_position, 1) - 1; + result_child_indices_ptr[j] = child_id; + } + } + __syncthreads(); + + // Compute the distance to child nodes using extern compute_distance + constexpr unsigned warp_size = 32; + const auto num_k = knn_k * search_width; + const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bitshift); + const bool lead_lane = (threadIdx.x & ((1u << team_size_bitshift) - 1u)) == 0; + const uint32_t ofst = STATIC_RESULT_POSITION ? 0 : result_position[0]; + for (uint32_t i = threadIdx.x >> team_size_bitshift; i < max_i; + i += blockDim.x >> team_size_bitshift) { + const auto j = i + ofst; + const bool valid_i = STATIC_RESULT_POSITION ? (j < num_k) : (j < max_result_position); + const auto child_id = valid_i ? result_child_indices_ptr[j] : invalid_index; + + DistanceT child_dist = 0; + if constexpr (DescType == DescriptorType::Standard) { + child_dist = device::team_sum( + (child_id != invalid_index) + ? compute_distance_standard( + dataset_ptr, smem_ws_ptr, child_id, dim, ld, team_size_bitshift, dataset_norms) + : (lead_lane ? raft::upper_bound() : 0), + team_size_bitshift); + } else if constexpr (DescType == DescriptorType::VPQ) { + child_dist = device::team_sum((child_id != invalid_index) + ? compute_distance_vpq(encoded_dataset_ptr, + smem_ws_ptr, + child_id, + encoded_dataset_dim, + vq_code_book_ptr, + pq_code_book_ptr, + team_size_bitshift) + : (lead_lane ? raft::upper_bound() : 0), + team_size_bitshift); + } + __syncwarp(); + + // Store the distance + if (valid_i && lead_lane) { result_child_distances_ptr[j] = child_dist; } + } +} + +// JIT version of search_core - uses extern functions instead of templated descriptor +template +RAFT_DEVICE_INLINE_FUNCTION void search_core(uintptr_t result_indices_ptr, + DistanceT* const result_distances_ptr, + const std::uint32_t top_k, + const DataT* const queries_ptr, + const IndexT* const knn_graph, + const std::uint32_t graph_degree, + const SourceIndexT* source_indices_ptr, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const IndexT* seed_ptr, + const uint32_t num_seeds, + IndexT* const visited_hashmap_ptr, + const std::uint32_t max_candidates, + const std::uint32_t max_itopk, + const std::uint32_t internal_topk, + const std::uint32_t search_width, + const std::uint32_t min_iteration, + const std::uint32_t max_iteration, + std::uint32_t* const num_executed_iterations, + const std::uint32_t hash_bitlen, + const std::uint32_t small_hash_bitlen, + const std::uint32_t small_hash_reset_interval, + const std::uint32_t query_id, + const DataT* dataset_ptr, + const uint8_t* encoded_dataset_ptr, + IndexT dataset_size, + uint32_t dim, + uint32_t encoded_dataset_dim, + uint32_t ld, + const DistanceT* dataset_norms, + const CodebookT* vq_code_book_ptr, + const CodebookT* pq_code_book_ptr) +{ + using LOAD_T = device::LOAD_128BIT_T; + + auto to_source_index = [source_indices_ptr](IndexT x) { + return source_indices_ptr == nullptr ? static_cast(x) : source_indices_ptr[x]; + }; + +#ifdef _CLK_BREAKDOWN + std::uint64_t clk_init = 0; + std::uint64_t clk_compute_1st_distance = 0; + std::uint64_t clk_topk = 0; + std::uint64_t clk_reset_hash = 0; + std::uint64_t clk_pickup_parents = 0; + std::uint64_t clk_restore_hash = 0; + std::uint64_t clk_compute_distance = 0; + std::uint64_t clk_start; +#define _CLK_START() clk_start = clock64() +#define _CLK_REC(V) V += clock64() - clk_start; +#else +#define _CLK_START() +#define _CLK_REC(V) +#endif + _CLK_START(); + + extern __shared__ uint8_t smem[]; + + // Layout of result_buffer + const auto result_buffer_size = internal_topk + (search_width * graph_degree); + const auto result_buffer_size_32 = raft::round_up_safe(result_buffer_size, 32); + const auto small_hash_size = hashmap::get_size(small_hash_bitlen); + + // Compute smem_ws_size_in_bytes based on descriptor type + uint32_t smem_ws_size_in_bytes = 0; + if constexpr (DescType == DescriptorType::Standard) { + using desc_type = cuvs::neighbors::cagra::detail:: + standard_dataset_descriptor_t; + using QUERY_T = typename desc_type::QUERY_T; + smem_ws_size_in_bytes = + sizeof(desc_type) + raft::round_up_safe(dim, DatasetBlockDim) * sizeof(QUERY_T); + } else if constexpr (DescType == DescriptorType::VPQ) { + using desc_type = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; + using QUERY_T = typename desc_type::QUERY_T; + constexpr uint32_t kSMemCodeBookSizeInBytes = (1 << PQ_BITS) * PQ_LEN * sizeof(CodebookT); + smem_ws_size_in_bytes = sizeof(desc_type) + kSMemCodeBookSizeInBytes + + raft::round_up_safe(dim, DatasetBlockDim) * sizeof(QUERY_T); + } + + // Set smem working buffer for the distance calculation using extern function + uint32_t smem_ws_ptr = 0; + if constexpr (DescType == DescriptorType::Standard) { + smem_ws_ptr = + setup_workspace_standard( + smem, queries_ptr, query_id, dataset_ptr, dataset_size, dim, ld, dataset_norms); + } else if constexpr (DescType == DescriptorType::VPQ) { + smem_ws_ptr = setup_workspace_vpq(smem, + queries_ptr, + query_id, + encoded_dataset_ptr, + encoded_dataset_dim, + vq_code_book_ptr, + pq_code_book_ptr, + dataset_size, + dim); + } + + auto* __restrict__ result_indices_buffer = + reinterpret_cast(smem + smem_ws_size_in_bytes); + auto* __restrict__ result_distances_buffer = + reinterpret_cast(result_indices_buffer + result_buffer_size_32); + auto* __restrict__ visited_hash_buffer = + reinterpret_cast(result_distances_buffer + result_buffer_size_32); + auto* __restrict__ parent_list_buffer = + reinterpret_cast(visited_hash_buffer + small_hash_size); + auto* __restrict__ topk_ws = reinterpret_cast(parent_list_buffer + search_width); + auto* terminate_flag = reinterpret_cast(topk_ws + 3); + auto* __restrict__ smem_work_ptr = reinterpret_cast(terminate_flag + 1); + + // A flag for filtering. + auto filter_flag = terminate_flag; + + if (threadIdx.x == 0) { + terminate_flag[0] = 0; + topk_ws[0] = ~0u; + } + + // Init hashmap + IndexT* local_visited_hashmap_ptr; + if (small_hash_bitlen) { + local_visited_hashmap_ptr = visited_hash_buffer; + } else { + local_visited_hashmap_ptr = visited_hashmap_ptr + (hashmap::get_size(hash_bitlen) * blockIdx.y); + } + hashmap::init(local_visited_hashmap_ptr, hash_bitlen, 0); + __syncthreads(); + _CLK_REC(clk_init); + + // compute distance to randomly selecting nodes using JIT version + _CLK_START(); + const IndexT* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; + constexpr uint32_t team_size_bits = raft::Pow2::Log2; + compute_distance_to_random_nodes_jit(result_indices_buffer, + result_distances_buffer, + dataset_ptr, + encoded_dataset_ptr, + smem_ws_ptr, + dataset_size, + dim, + encoded_dataset_dim, + ld, + team_size_bits, + dataset_norms, + vq_code_book_ptr, + pq_code_book_ptr, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0); + __syncthreads(); + _CLK_REC(clk_compute_1st_distance); + + std::uint32_t iter = 0; + while (1) { + // sort + if constexpr (TOPK_BY_BITONIC_SORT) { + assert(blockDim.x >= 64); + const bool bitonic_sort_and_full_multi_warps = (max_candidates > 128) ? true : false; + + // reset small-hash table. + if ((iter + 1) % small_hash_reset_interval == 0) { + _CLK_START(); + unsigned hash_start_tid; + if (blockDim.x == 32) { + hash_start_tid = 0; + } else if (blockDim.x == 64) { + if (bitonic_sort_and_full_multi_warps || BITONIC_SORT_AND_MERGE_MULTI_WARPS) { + hash_start_tid = 0; + } else { + hash_start_tid = 32; + } + } else { + if (bitonic_sort_and_full_multi_warps || BITONIC_SORT_AND_MERGE_MULTI_WARPS) { + hash_start_tid = 64; + } else { + hash_start_tid = 32; + } + } + hashmap::init(local_visited_hashmap_ptr, hash_bitlen, hash_start_tid); + _CLK_REC(clk_reset_hash); + } + + // topk with bitonic sort + _CLK_START(); + // For JIT version, we always check filter_flag at runtime since sample_filter is extern + if (*filter_flag != 0) { + // Move the filtered out index to the end of the itopk list + for (unsigned i = 0; i < search_width; i++) { + move_invalid_to_end_of_list( + result_indices_buffer, result_distances_buffer, internal_topk); + } + if (threadIdx.x == 0) { *terminate_flag = 0; } + } + topk_by_bitonic_sort_and_merge( + result_distances_buffer, + result_indices_buffer, + max_itopk, + internal_topk, + result_distances_buffer + internal_topk, + result_indices_buffer + internal_topk, + max_candidates, + search_width * graph_degree, + topk_ws, + (iter == 0)); + __syncthreads(); + _CLK_REC(clk_topk); + } else { + _CLK_START(); + // topk with radix block sort + topk_by_radix_sort{}(max_itopk, + internal_topk, + result_buffer_size, + reinterpret_cast(result_distances_buffer), + result_indices_buffer, + reinterpret_cast(result_distances_buffer), + result_indices_buffer, + nullptr, + topk_ws, + true, + smem_work_ptr); + _CLK_REC(clk_topk); + + // reset small-hash table + if ((iter + 1) % small_hash_reset_interval == 0) { + _CLK_START(); + hashmap::init(local_visited_hashmap_ptr, hash_bitlen); + _CLK_REC(clk_reset_hash); + } + } + __syncthreads(); + + if (iter + 1 == max_iteration) { break; } + + // pick up next parents + if (threadIdx.x < 32) { + _CLK_START(); + pickup_next_parents( + terminate_flag, parent_list_buffer, result_indices_buffer, internal_topk, search_width); + _CLK_REC(clk_pickup_parents); + } + + // restore small-hash table by putting internal-topk indices in it + if ((iter + 1) % small_hash_reset_interval == 0) { + const unsigned first_tid = ((blockDim.x <= 32) ? 0 : 32); + _CLK_START(); + hashmap_restore( + local_visited_hashmap_ptr, hash_bitlen, result_indices_buffer, internal_topk, first_tid); + _CLK_REC(clk_restore_hash); + } + __syncthreads(); + + if (*terminate_flag && iter >= min_iteration) { break; } + + // compute the norms between child nodes and query node using JIT version + _CLK_START(); + compute_distance_to_child_nodes_jit(result_indices_buffer + internal_topk, + result_distances_buffer + internal_topk, + dataset_ptr, + encoded_dataset_ptr, + smem_ws_ptr, + dim, + encoded_dataset_dim, + ld, + team_size_bits, + dataset_norms, + vq_code_book_ptr, + pq_code_book_ptr, + knn_graph, + graph_degree, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0, + parent_list_buffer, + result_indices_buffer, + search_width); + __syncthreads(); + _CLK_REC(clk_compute_distance); + + // Filtering - use extern sample_filter function + if (threadIdx.x == 0) { *filter_flag = 0; } + __syncthreads(); + + constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const IndexT invalid_index = utils::get_max_value(); + + for (unsigned p = threadIdx.x; p < search_width; p += blockDim.x) { + if (parent_list_buffer[p] != invalid_index) { + const auto parent_id = result_indices_buffer[parent_list_buffer[p]] & ~index_msb_1_mask; + if (!sample_filter(query_id, to_source_index(parent_id))) { + result_distances_buffer[parent_list_buffer[p]] = utils::get_max_value(); + result_indices_buffer[parent_list_buffer[p]] = invalid_index; + *filter_flag = 1; + } + } + } + __syncthreads(); + + iter++; + } + + // Post process for filtering - use extern sample_filter function + constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const IndexT invalid_index = utils::get_max_value(); + + for (unsigned i = threadIdx.x; i < internal_topk + search_width * graph_degree; i += blockDim.x) { + const auto node_id = result_indices_buffer[i] & ~index_msb_1_mask; + if (node_id != (invalid_index & ~index_msb_1_mask) && + !sample_filter(query_id, to_source_index(node_id))) { + result_distances_buffer[i] = utils::get_max_value(); + result_indices_buffer[i] = invalid_index; + } + } + + __syncthreads(); + // Move invalid index items to the end of the buffer without sorting the entire buffer + using scan_op_t = cub::WarpScan; + auto& temp_storage = *reinterpret_cast(smem_work_ptr); + + constexpr std::uint32_t warp_size = 32; + if (threadIdx.x < warp_size) { + std::uint32_t num_found_valid = 0; + for (std::uint32_t buffer_offset = 0; buffer_offset < internal_topk; + buffer_offset += warp_size) { + const auto src_position = buffer_offset + threadIdx.x; + const std::uint32_t is_valid_index = + (result_indices_buffer[src_position] & (~index_msb_1_mask)) == invalid_index ? 0 : 1; + std::uint32_t new_position; + scan_op_t(temp_storage).InclusiveSum(is_valid_index, new_position); + if (is_valid_index) { + const auto dst_position = num_found_valid + (new_position - 1); + result_indices_buffer[dst_position] = result_indices_buffer[src_position]; + result_distances_buffer[dst_position] = result_distances_buffer[src_position]; + } + + num_found_valid += new_position; + for (std::uint32_t offset = (warp_size >> 1); offset > 0; offset >>= 1) { + const auto v = raft::shfl_xor(num_found_valid, offset); + if ((threadIdx.x & offset) == 0) { num_found_valid = v; } + } + + if (num_found_valid >= top_k) { break; } + } + + if (num_found_valid < top_k) { + for (std::uint32_t i = num_found_valid + threadIdx.x; i < internal_topk; i += warp_size) { + result_indices_buffer[i] = invalid_index; + result_distances_buffer[i] = utils::get_max_value(); + } + } + } + + // If the sufficient number of valid indexes are not in the internal topk, pick up from the + // candidate list. + if (top_k > internal_topk || result_indices_buffer[top_k - 1] == invalid_index) { + __syncthreads(); + topk_by_bitonic_sort_and_merge( + result_distances_buffer, + result_indices_buffer, + max_itopk, + internal_topk, + result_distances_buffer + internal_topk, + result_indices_buffer + internal_topk, + max_candidates, + search_width * graph_degree, + topk_ws, + (iter == 0)); + } + __syncthreads(); + + // NB: The indices pointer is tagged with its element size. + const uint32_t index_element_tag = result_indices_ptr & 0x3; + result_indices_ptr ^= index_element_tag; + auto write_indices = + index_element_tag == 3 + ? [](uintptr_t ptr, + uint32_t i, + SourceIndexT x) { reinterpret_cast(ptr)[i] = static_cast(x); } + : index_element_tag == 2 + ? [](uintptr_t ptr, + uint32_t i, + SourceIndexT x) { reinterpret_cast(ptr)[i] = static_cast(x); } + : index_element_tag == 1 + ? [](uintptr_t ptr, + uint32_t i, + SourceIndexT x) { reinterpret_cast(ptr)[i] = static_cast(x); } + : [](uintptr_t ptr, uint32_t i, SourceIndexT x) { + reinterpret_cast(ptr)[i] = static_cast(x); + }; + for (std::uint32_t i = threadIdx.x; i < top_k; i += blockDim.x) { + unsigned j = i + (top_k * query_id); + unsigned ii = i; + if constexpr (TOPK_BY_BITONIC_SORT) { ii = device::swizzling(i); } + if (result_distances_ptr != nullptr) { result_distances_ptr[j] = result_distances_buffer[ii]; } + + auto internal_index = + result_indices_buffer[ii] & ~index_msb_1_mask; // clear most significant bit + auto source_index = to_source_index(internal_index); + write_indices(result_indices_ptr, j, source_index); + } + if (threadIdx.x == 0 && num_executed_iterations != nullptr) { + num_executed_iterations[query_id] = iter + 1; + } +#ifdef _CLK_BREAKDOWN + if ((threadIdx.x == 0 || threadIdx.x == blockDim.x - 1) && ((query_id * 3) % gridDim.y < 3)) { + printf( + "%s:%d " + "query, %d, thread, %d" + ", init, %lu" + ", 1st_distance, %lu" + ", topk, %lu" + ", reset_hash, %lu" + ", pickup_parents, %lu" + ", restore_hash, %lu" + ", distance, %lu" + "\n", + __FILE__, + __LINE__, + query_id, + threadIdx.x, + clk_init, + clk_compute_1st_distance, + clk_topk, + clk_reset_hash, + clk_pickup_parents, + clk_restore_hash, + clk_compute_distance); + } +#endif +} + +// JIT kernel wrapper - calls search_core +template +RAFT_KERNEL __launch_bounds__(1024, 1) + search_kernel_jit(uintptr_t result_indices_ptr, + DistanceT* const result_distances_ptr, + const std::uint32_t top_k, + const DataT* const queries_ptr, + const IndexT* const knn_graph, + const std::uint32_t graph_degree, + const SourceIndexT* source_indices_ptr, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const IndexT* seed_ptr, + const uint32_t num_seeds, + IndexT* const visited_hashmap_ptr, + const std::uint32_t max_candidates, + const std::uint32_t max_itopk, + const std::uint32_t internal_topk, + const std::uint32_t search_width, + const std::uint32_t min_iteration, + const std::uint32_t max_iteration, + std::uint32_t* const num_executed_iterations, + const std::uint32_t hash_bitlen, + const std::uint32_t small_hash_bitlen, + const std::uint32_t small_hash_reset_interval, + const DataT* dataset_ptr, + const uint8_t* encoded_dataset_ptr, + IndexT dataset_size, + uint32_t dim, + uint32_t encoded_dataset_dim, + uint32_t ld, + const DistanceT* dataset_norms, + const CodebookT* vq_code_book_ptr, + const CodebookT* pq_code_book_ptr, + SampleFilterT sample_filter) +{ + const auto query_id = blockIdx.y; + search_core(result_indices_ptr, + result_distances_ptr, + top_k, + queries_ptr, + knn_graph, + graph_degree, + source_indices_ptr, + num_distilation, + rand_xor_mask, + seed_ptr, + num_seeds, + visited_hashmap_ptr, + max_candidates, + max_itopk, + internal_topk, + search_width, + min_iteration, + max_iteration, + num_executed_iterations, + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + query_id, + dataset_ptr, + encoded_dataset_ptr, + dataset_size, + dim, + encoded_dataset_dim, + ld, + dataset_norms, + vq_code_book_ptr, + pq_code_book_ptr); +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit.cuh new file mode 100644 index 0000000000..dd182729ae --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit.cuh @@ -0,0 +1,106 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "compute_distance-ext.cuh" +#include "device_common.hpp" +#include "hashmap.hpp" +#include "utils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Enum to distinguish between descriptor types +enum class DescriptorType { Standard, VPQ }; + +// These extern device functions are linked at runtime using JIT-LTO. +// They are templated on descriptor parameters (not DescriptorT) and create +// descriptor instances internally. + +// Standard descriptor extern functions +template +extern __device__ uint32_t setup_workspace_standard(void* smem, + const DataT* queries, + uint32_t query_id, + const DataT* dataset_ptr, + IndexT dataset_size, + uint32_t dim, + uint32_t ld, + const DistanceT* dataset_norms = nullptr); + +template +extern __device__ DistanceT compute_distance_standard(const DataT* dataset_ptr, + uint32_t smem_ws_ptr, + IndexT dataset_index, + uint32_t dim, + uint32_t ld, + uint32_t team_size_bitshift, + const DistanceT* dataset_norms = nullptr); + +// VPQ descriptor extern functions +template +extern __device__ uint32_t setup_workspace_vpq(void* smem, + const DataT* queries, + uint32_t query_id, + const uint8_t* encoded_dataset_ptr, + uint32_t encoded_dataset_dim, + const CodebookT* vq_code_book_ptr, + const CodebookT* pq_code_book_ptr, + IndexT dataset_size, + uint32_t dim); + +template +extern __device__ DistanceT compute_distance_vpq(const uint8_t* encoded_dataset_ptr, + uint32_t smem_ws_ptr, + IndexT dataset_index, + uint32_t encoded_dataset_dim, + const CodebookT* vq_code_book_ptr, + const CodebookT* pq_code_book_ptr, + uint32_t team_size_bitshift); + +// Sample filter extern function +template +extern __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id); + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search + +// Include the implementation +#include "search_single_cta_kernel_jit-inl.cuh" From 533b770dc6b7afbc8cd768d0d94e1b9170427e65 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 11 Feb 2026 06:05:15 +0000 Subject: [PATCH 087/158] address review and move --- conda/recipes/libcuvs/recipe.yaml | 9 +++++++++ cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 2 +- .../jit_lto_kernels/interleaved_scan_kernel.cu.in | 2 +- .../ivf_flat_interleaved_scan_kernel.cuh | 0 4 files changed, 11 insertions(+), 2 deletions(-) rename cpp/src/neighbors/ivf_flat/{ => jit_lto_kernels}/ivf_flat_interleaved_scan_kernel.cuh (100%) diff --git a/conda/recipes/libcuvs/recipe.yaml b/conda/recipes/libcuvs/recipe.yaml index d646195016..abd3031a94 100644 --- a/conda/recipes/libcuvs/recipe.yaml +++ b/conda/recipes/libcuvs/recipe.yaml @@ -423,6 +423,9 @@ outputs: - libcurand-dev - libcusolver-dev - libcusparse-dev + - if: cuda_major == "13" + then: + - libnvjitlink-dev run: - ${{ pin_subpackage("libcuvs-headers", exact=True) }} - ${{ pin_subpackage("libcuvs", exact=True) }} @@ -433,6 +436,9 @@ outputs: - libcurand - libcusolver - libcusparse + - if: cuda_major == "13" + then: + - libnvjitlink ignore_run_exports: by_name: - cuda-cudart @@ -443,6 +449,9 @@ outputs: - libcurand - libcusolver - libcusparse + - if: cuda_major == "13" + then: + - libnvjitlink - librmm - mkl - nccl diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 93d1f205e7..0983267e04 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -103,7 +103,7 @@ std::shared_ptr AlgorithmPlanner::build() check_nvjitlink_result(handle, result); result = nvJitLinkDestroy(&handle); - check_nvjitlink_result(handle, result); + RAFT_EXPECTS(result == NVJITLINK_SUCCESS, "nvJitLinkDestroy failed"); // cubin is linked, so now load it // NOTE: cudaLibrary_t does not need to be freed explicitly diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in index 8c972221bc..5e75253939 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in @@ -7,7 +7,7 @@ #ifdef BUILD_KERNEL -#include +#include namespace cuvs::neighbors::ivf_flat::detail { diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh similarity index 100% rename from cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_kernel.cuh rename to cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh From 78c59d984e458f64c22ba00ac07aa1170eb40215 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 11 Feb 2026 06:06:47 +0000 Subject: [PATCH 088/158] one more fix --- .../jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh index 907638b3ff..21fa1951f6 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh @@ -619,7 +619,7 @@ struct loadAndComputeDist { int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); compute_dist<2, int8_t, int32_t>(dist, q, encV); if constexpr (ComputeNorm) { - norm_query = raft::dp4a(queryReg, queryReg, norm_query); + norm_query = raft::dp4a(q, q, norm_query); norm_data = raft::dp4a(encV, encV, norm_data); } } From 7f8802b80826d3197be33a65465c7470775b66f1 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 11 Feb 2026 06:10:48 +0000 Subject: [PATCH 089/158] correct path --- .../jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh index 21fa1951f6..3a14fe8afd 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh @@ -5,7 +5,7 @@ #pragma once -#include "../ivf_common.cuh" +#include "../../ivf_common.cuh" #include From 39ce9e3066739d68e3a2ab1bb1b146ef7a093124 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 13 Feb 2026 17:51:28 +0000 Subject: [PATCH 090/158] in the middle of stuff --- cpp/CMakeLists.txt | 9 + .../modules/generate_jit_lto_kernels.cmake | 567 ++++++++++- .../cuvs/detail/jit_lto/AlgorithmLauncher.hpp | 10 + cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 43 + cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 104 +- .../jit_lto/NVRTCLTOFragmentCompiler.cu | 2 +- .../neighbors/detail/cagra/cagra_search.cuh | 1 + .../detail/cagra/compute_distance.hpp | 25 +- .../cagra/compute_distance_standard-impl.cuh | 32 +- .../cagra/compute_distance_vpq-impl.cuh | 46 +- .../neighbors/detail/cagra/device_common.hpp | 15 +- .../jit_lto_kernels/apply_filter_kernel.cu.in | 39 + .../compute_distance_standard.cu.in | 10 +- .../compute_distance_standard_impl.cuh | 40 +- ...mpute_distance_to_child_nodes_kernel.cu.in | 42 + ...e_distance_to_child_nodes_kernel_vpq.cu.in | 43 + .../compute_distance_vpq.cu.in | 11 +- .../compute_distance_vpq_impl.cuh | 60 +- .../jit_lto_kernels/device_common_jit.cuh | 261 +++++ .../extern_device_functions.cuh | 121 +++ .../detail/cagra/jit_lto_kernels/filter.cu.in | 32 + .../cagra/jit_lto_kernels/filter_bitset.cuh | 36 + .../cagra/jit_lto_kernels/filter_data.h | 27 + .../cagra/jit_lto_kernels/filter_none.cuh | 20 + .../random_pickup_kernel.cu.in | 42 + .../random_pickup_kernel_vpq.cu.in | 43 + .../search_multi_cta_helpers.cuh | 138 +++ .../search_multi_cta_kernel.cu.in | 50 + .../search_multi_cta_kernel_jit.cuh | 451 +++++++++ .../search_multi_cta_kernel_vpq.cu.in | 51 + .../search_multi_cta_planner.hpp | 142 +++ .../search_multi_kernel_jit.cuh | 349 +++++++ .../search_multi_kernel_planner.hpp | 144 +++ .../search_single_cta_kernel.cu.in | 42 + .../search_single_cta_kernel_jit.cuh | 728 ++++++++++++++ .../search_single_cta_kernel_p.cu.in | 42 + .../search_single_cta_kernel_p_vpq.cu.in | 43 + .../search_single_cta_kernel_vpq.cu.in | 43 + .../search_single_cta_planner.hpp | 121 ++- .../setup_workspace_standard.cu.in | 9 +- .../setup_workspace_standard_impl.cuh | 51 +- .../jit_lto_kernels/setup_workspace_vpq.cu.in | 10 +- .../setup_workspace_vpq_impl.cuh | 80 +- .../cagra/search_multi_cta_kernel-inl.cuh | 71 +- .../search_multi_cta_kernel_launcher_jit.cuh | 283 ++++++ .../detail/cagra/search_multi_kernel.cuh | 179 +++- .../search_multi_kernel_launcher_jit.cuh | 323 ++++++ .../detail/cagra/search_single_cta.cuh | 1 + .../detail/cagra/search_single_cta_inst.cuh | 7 +- .../cagra/search_single_cta_kernel-inl.cuh | 131 --- ...search_single_cta_kernel_explicit_inst.cuh | 4 +- .../search_single_cta_kernel_jit-inl.cuh | 863 ---------------- .../cagra/search_single_cta_kernel_jit.cuh | 106 -- .../search_single_cta_kernel_launcher.cuh | 117 +++ ...arch_single_cta_kernel_launcher_common.cuh | 63 ++ .../search_single_cta_kernel_launcher_jit.cuh | 943 ++++++++++++++++++ .../detail/cagra/set_value_batch.cuh | 40 + 57 files changed, 5915 insertions(+), 1391 deletions(-) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_none.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_helpers.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh create mode 100644 cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit-inl.cuh delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit.cuh create mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher.cuh create mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_common.cuh create mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh create mode 100644 cpp/src/neighbors/detail/cagra/set_value_batch.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a9938de7da..53a52105b6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -317,6 +317,10 @@ if(NOT BUILD_CPU_ONLY) CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON ) + target_compile_definitions( + cuvs-cagra-search PRIVATE $<$:CUVS_BUILD_CAGRA_HNSWLIB> + $<$:NVTX_ENABLED> + ) target_link_libraries( cuvs-cagra-search PRIVATE cuvs::cuvs_cpp_headers $ @@ -351,6 +355,11 @@ if(NOT BUILD_CPU_ONLY) set(JIT_LTO_COMPILATION ON) endif() + # Set JIT LTO compile definition for cuvs-cagra-search AFTER JIT_LTO_COMPILATION is determined + target_compile_definitions( + cuvs-cagra-search PRIVATE $<$:CUVS_ENABLE_JIT_LTO> + ) + if(JIT_LTO_COMPILATION) # Generate interleaved scan kernel files at build time include(cmake/modules/generate_jit_lto_kernels.cmake) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 53095fc425..f227a10520 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -105,7 +105,7 @@ function(parse_jit_lto_data_type_configs config) endif() endfunction() -# cmake-lint: disable=R0915 +# cmake-lint: disable=R0915,R0912 function(generate_jit_lto_kernels target) add_library(${target} STATIC) target_include_directories( @@ -225,8 +225,8 @@ function(generate_jit_lto_kernels target) set(cagra_index_abbrev "ui") set(cagra_distance_type "float") set(cagra_distance_abbrev "f") - set(cagra_metrics "L2Expanded" "InnerProduct" "CosineExpanded") - set(cagra_metric_abbrevs "l2" "ip" "cos") + set(cagra_metrics "L2Expanded" "InnerProduct" "CosineExpanded" "BitwiseHamming") + set(cagra_metric_abbrevs "l2" "ip" "cos" "hamming") set(cagra_team_sizes 8 16 32) set(cagra_dataset_block_dims 128 256 512) set(cagra_pq_bits 8) @@ -237,17 +237,39 @@ function(generate_jit_lto_kernels target) foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) - foreach(metric_idx IN ITEMS 0 1 2) + foreach(metric_idx IN ITEMS 0 1 2 3) list(GET cagra_metrics ${metric_idx} metric) list(GET cagra_metric_abbrevs ${metric_idx} metric_name) + # Map metric abbreviation to full name used by planner's metric_to_string() + if(metric_name STREQUAL "l2") + set(metric_name_full "L2Expanded") + set(metric_tag "l2") + elseif(metric_name STREQUAL "ip") + set(metric_name_full "InnerProduct") + set(metric_tag "inner_product") + elseif(metric_name STREQUAL "cos") + set(metric_name_full "CosineExpanded") + set(metric_tag "cosine") + elseif(metric_name STREQUAL "hamming") + set(metric_name_full "BitwiseHamming") + set(metric_tag "hamming") + # BitwiseHamming is only supported for uint8_t (data_idx=2) + if(NOT data_idx EQUAL 2) + continue() + endif() + else() + set(metric_name_full "${metric_name}") + set(metric_tag "${metric_name}") + endif() foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) # setup_workspace_standard set(kernel_name - "setup_workspace_standard_${metric_name}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "setup_workspace_standard_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") set(metric_cpp "cuvs::distance::DistanceType::${metric}") + set(metric_name "${metric_name_full}") set(data_type "${data_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") @@ -268,10 +290,11 @@ function(generate_jit_lto_kernels target) # compute_distance_standard set(kernel_name - "compute_distance_standard_${metric_name}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "compute_distance_standard_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") set(metric_cpp "cuvs::distance::DistanceType::${metric}") + set(metric_name "${metric_name_full}") set(data_type "${data_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") @@ -308,6 +331,7 @@ function(generate_jit_lto_kernels target) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") set(metric_name "l2") + set(metric_tag "l2") set(pq_bits "${cagra_pq_bits}") set(codebook_type "${cagra_codebook_type}") set(data_type "${data_type}") @@ -335,6 +359,7 @@ function(generate_jit_lto_kernels target) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") set(metric_name "l2") + set(metric_tag "l2") set(pq_bits "${cagra_pq_bits}") set(codebook_type "${cagra_codebook_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -358,4 +383,534 @@ function(generate_jit_lto_kernels target) endforeach() endforeach() endforeach() + + # Generate CAGRA kernel entrypoint fragments These are the main kernel entrypoints that call the + # device functions + set(cagra_topk_by_bitonic_sort_options "true" "false") + set(cagra_bitonic_sort_and_merge_multi_warps_options "true" "false") + set(cagra_topk_by_bitonic_sort_str_options "true" "false") + set(cagra_bitonic_sort_and_merge_multi_warps_str_options "true" "false") + + # For kernel instantiation, we need to provide template parameters The actual + # metric/team_size/dataset_block_dim used at runtime are determined via device functions We use + # default values for the template instantiation - these don't affect runtime behavior + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(metric_idx IN ITEMS 0 1 2 3) + list(GET cagra_metrics ${metric_idx} metric) + list(GET cagra_metric_abbrevs ${metric_idx} metric_name) + # Map metric abbreviation to full name used by planner's metric_to_string() + if(metric_name STREQUAL "l2") + set(metric_name_full "L2Expanded") + set(metric_tag "l2") + elseif(metric_name STREQUAL "ip") + set(metric_name_full "InnerProduct") + set(metric_tag "inner_product") + elseif(metric_name STREQUAL "cos") + set(metric_name_full "CosineExpanded") + set(metric_tag "cosine") + elseif(metric_name STREQUAL "hamming") + set(metric_name_full "BitwiseHamming") + set(metric_tag "hamming") + # BitwiseHamming is only supported for uint8_t (data_idx=2) + if(NOT data_idx EQUAL 2) + continue() + endif() + else() + set(metric_name_full "${metric_name}") + set(metric_tag "${metric_name}") + endif() + foreach(topk_idx IN ITEMS 0 1) + list(GET cagra_topk_by_bitonic_sort_options ${topk_idx} topk_by_bitonic_sort) + list(GET cagra_topk_by_bitonic_sort_str_options ${topk_idx} topk_by_bitonic_sort_str) + foreach(merge_idx IN ITEMS 0 1) + list(GET cagra_bitonic_sort_and_merge_multi_warps_options ${merge_idx} + bitonic_sort_and_merge_multi_warps + ) + list(GET cagra_bitonic_sort_and_merge_multi_warps_str_options ${merge_idx} + bitonic_sort_and_merge_multi_warps_str + ) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # Regular kernel entrypoint - generate for each combination + set(kernel_name + "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + ) + set(filename + "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" + ) + set(metric "${metric}") + set(metric_cpp "cuvs::distance::DistanceType::${metric}") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(source_index_type "${cagra_index_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(src_idx_abbrev "${cagra_index_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # Persistent kernel entrypoint - generate for each combination + set(kernel_name + "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + ) + set(filename + "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" + ) + set(metric "${metric}") + set(metric_cpp "cuvs::distance::DistanceType::${metric}") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(source_index_type "${cagra_index_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(src_idx_abbrev "${cagra_index_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + endforeach() + endforeach() + endforeach() + + # Generate single_cta VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to + # team_size and dataset_block_dim + foreach(data_idx IN ITEMS 0 1) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(topk_idx IN ITEMS 0 1) + list(GET cagra_topk_by_bitonic_sort_options ${topk_idx} topk_by_bitonic_sort) + list(GET cagra_topk_by_bitonic_sort_str_options ${topk_idx} topk_by_bitonic_sort_str) + foreach(merge_idx IN ITEMS 0 1) + list(GET cagra_bitonic_sort_and_merge_multi_warps_options ${merge_idx} + bitonic_sort_and_merge_multi_warps + ) + list(GET cagra_bitonic_sort_and_merge_multi_warps_str_options ${merge_idx} + bitonic_sort_and_merge_multi_warps_str + ) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + foreach(pq_len IN LISTS cagra_pq_lens) + # Regular VPQ kernel entrypoint + set(kernel_name + "search_single_cta_kernel_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + ) + set(filename + "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" + ) + set(metric "L2Expanded") + set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") + set(metric_name_full "L2Expanded") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "${cagra_pq_bits}") + set(pq_len "${pq_len}") + set(codebook_type "${cagra_codebook_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(source_index_type "${cagra_index_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(src_idx_abbrev "${cagra_index_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # Persistent VPQ kernel entrypoint + set(kernel_name + "search_single_cta_kernel_p_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + ) + set(filename + "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" + ) + set(metric "L2Expanded") + set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") + set(metric_name_full "L2Expanded") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "${cagra_pq_bits}") + set(pq_len "${pq_len}") + set(codebook_type "${cagra_codebook_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(source_index_type "${cagra_index_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(src_idx_abbrev "${cagra_index_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + endforeach() + endforeach() + endforeach() + + # Generate multi_cta kernel entrypoints Multi_cta kernels don't use topk_by_bitonic_sort or + # bitonic_sort_and_merge_multi_warps as template parameters (those are handled inside the kernel + # based on max_elements) IMPORTANT: Need to generate kernels for all combinations of team_size and + # dataset_block_dim because the kernel template uses DescriptorT::kTeamSize and + # DescriptorT::kDatasetBlockDim as template parameters when calling + # setup_workspace_standard/compute_distance_standard + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(metric_idx IN ITEMS 0 1 2 3) + list(GET cagra_metrics ${metric_idx} metric) + list(GET cagra_metric_abbrevs ${metric_idx} metric_name) + if(metric_name STREQUAL "l2") + set(metric_name_full "L2Expanded") + set(metric_tag "l2") + elseif(metric_name STREQUAL "ip") + set(metric_name_full "InnerProduct") + set(metric_tag "inner_product") + elseif(metric_name STREQUAL "cos") + set(metric_name_full "CosineExpanded") + set(metric_tag "cosine") + elseif(metric_name STREQUAL "hamming") + set(metric_name_full "BitwiseHamming") + set(metric_tag "hamming") + # BitwiseHamming is only supported for uint8_t (data_idx=2) + if(NOT data_idx EQUAL 2) + continue() + endif() + else() + set(metric_name_full "${metric_name}") + set(metric_tag "${metric_name}") + endif() + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # Multi_cta kernel entrypoint - generate for each combination + set(kernel_name + "search_multi_cta_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(metric "${metric}") + set(metric_cpp "cuvs::distance::DistanceType::${metric_name_full}") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(source_index_type "${cagra_index_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(src_idx_abbrev "${cagra_index_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + endforeach() + + # Generate multi_cta VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to + # team_size and dataset_block_dim VPQ is supported for all data types (float, half, int8_t, + # uint8_t) + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + foreach(pq_len IN LISTS cagra_pq_lens) + # Multi_cta VPQ kernel entrypoint - generate for each combination + set(kernel_name + "search_multi_cta_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(metric "L2Expanded") + set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") + set(metric_name_full "L2Expanded") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "${cagra_pq_bits}") + set(pq_len "${pq_len}") + set(codebook_type "${cagra_codebook_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(source_index_type "${cagra_index_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(src_idx_abbrev "${cagra_index_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + endforeach() + + # Generate multi_kernel kernel entrypoints Multi_kernel has two separate kernels: + # random_pickup_kernel and compute_distance_to_child_nodes_kernel + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(metric_idx IN ITEMS 0 1 2 3) + list(GET cagra_metrics ${metric_idx} metric) + list(GET cagra_metric_abbrevs ${metric_idx} metric_name) + if(metric_name STREQUAL "l2") + set(metric_name_full "L2Expanded") + set(metric_tag "l2") + elseif(metric_name STREQUAL "ip") + set(metric_name_full "InnerProduct") + set(metric_tag "inner_product") + elseif(metric_name STREQUAL "cos") + set(metric_name_full "CosineExpanded") + set(metric_tag "cosine") + elseif(metric_name STREQUAL "hamming") + set(metric_name_full "BitwiseHamming") + set(metric_tag "hamming") + # BitwiseHamming is only supported for uint8_t (data_idx=2) + if(NOT data_idx EQUAL 2) + continue() + endif() + else() + set(metric_name_full "${metric_name}") + set(metric_tag "${metric_name}") + endif() + + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # random_pickup_kernel entrypoint - generate for each combination + set(kernel_name + "random_pickup_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(metric "${metric}") + set(metric_cpp "cuvs::distance::DistanceType::${metric_name_full}") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # compute_distance_to_child_nodes_kernel entrypoint - generate for each combination + set(kernel_name + "compute_distance_to_child_nodes_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(metric "${metric}") + set(metric_cpp "cuvs::distance::DistanceType::${metric_name_full}") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(source_index_type "${cagra_index_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(src_idx_abbrev "${cagra_index_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + endforeach() + + # Generate multi_kernel VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to + # team_size and dataset_block_dim VPQ is supported for all data types (float, half, int8_t, + # uint8_t) + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + foreach(pq_len IN LISTS cagra_pq_lens) + # random_pickup_kernel VPQ entrypoint + set(kernel_name + "random_pickup_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(metric "L2Expanded") + set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") + set(metric_name_full "L2Expanded") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "${cagra_pq_bits}") + set(pq_len "${pq_len}") + set(codebook_type "${cagra_codebook_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # compute_distance_to_child_nodes_kernel VPQ entrypoint + set(kernel_name + "compute_distance_to_child_nodes_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(metric "L2Expanded") + set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") + set(metric_name_full "L2Expanded") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "${cagra_pq_bits}") + set(pq_len "${pq_len}") + set(codebook_type "${cagra_codebook_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(source_index_type "${cagra_index_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(src_idx_abbrev "${cagra_index_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + endforeach() + + # Generate apply_filter_kernel entrypoints apply_filter_kernel doesn't use dataset_descriptor, so + # it only needs index types + set(kernel_name + "apply_filter_kernel_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(source_index_type "${cagra_index_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + set(src_idx_abbrev "${cagra_index_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # Generate CAGRA sample filter fragments + set(cagra_filter_configs "filter_none" "filter_bitset") + foreach(filter_name IN LISTS cagra_filter_configs) + set(header_file "neighbors/detail/cagra/jit_lto_kernels/${filter_name}.cuh") + set(kernel_name "${filter_name}_${cagra_index_abbrev}") + set(filename "${generated_kernels_dir}/cagra_filter_device_functions/fatbin_${kernel_name}.cu") + set(source_index_type "${cagra_index_type}") + # Pass both filter_name (for include) and kernel_name (for registration) + set(filter_name_var "${filter_name}") + set(kernel_name_var "${kernel_name}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/filter.cu.in" + "${filename}" @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_filter_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() endfunction() diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp index 7a578a8306..5723cd10c7 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp @@ -26,10 +26,20 @@ struct AlgorithmLauncher { this->call(stream, grid, block, shared_mem, kernel_args); } + template + void dispatch_cooperative( + cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, Args&&... args) + { + void* kernel_args[] = {const_cast(static_cast(&args))...}; + this->call_cooperative(stream, grid, block, shared_mem, kernel_args); + } + cudaKernel_t get_kernel() { return this->kernel; } private: void call(cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** args); + void call_cooperative( + cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** args); cudaKernel_t kernel; }; diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index d095689f5d..66c82ffeb1 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -12,6 +12,17 @@ AlgorithmLauncher::AlgorithmLauncher(cudaKernel_t k) : kernel{k} {} void AlgorithmLauncher::call( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) { + // Debug: verify kernel is being called + if (kernel != nullptr) { + std::cerr << "[JIT] AlgorithmLauncher::call - kernel is not null, launching with grid=(" + << grid.x << "," << grid.y << "," << grid.z << ") block=(" << block.x << "," + << block.y << "," << block.z << ")" << std::endl; + std::cerr.flush(); + } else { + std::cerr << "[JIT] ERROR: AlgorithmLauncher::call - kernel is NULL!" << std::endl; + std::cerr.flush(); + } + cudaLaunchAttribute attribute[1]; attribute[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; attribute[0].val.programmaticStreamSerializationAllowed = 1; @@ -24,6 +35,38 @@ void AlgorithmLauncher::call( config.numAttrs = 1; config.dynamicSmemBytes = shared_mem; + std::cerr << "[JIT] AlgorithmLauncher::call - About to launch kernel" << std::endl; + std::cerr.flush(); + + cudaError_t err = cudaLaunchKernelExC(&config, kernel, kernel_args); + if (err != cudaSuccess) { + std::cerr << "[JIT] ERROR: cudaLaunchKernelExC failed with: " << cudaGetErrorString(err) << " (" + << err << ")" << std::endl; + std::cerr.flush(); + } else { + std::cerr << "[JIT] Kernel launch succeeded" << std::endl; + std::cerr.flush(); + } + RAFT_CUDA_TRY(err); +} + +void AlgorithmLauncher::call_cooperative( + cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) +{ + cudaLaunchAttribute attributes[2]; + attributes[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; + attributes[0].val.programmaticStreamSerializationAllowed = 1; + attributes[1].id = cudaLaunchAttributeCooperative; + attributes[1].val.cooperative = 1; + + cudaLaunchConfig_t config; + config.gridDim = grid; + config.blockDim = block; + config.stream = stream; + config.attrs = attributes; + config.numAttrs = 2; + config.dynamicSmemBytes = shared_mem; + RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); } diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index aa30ffcb27..5b7d708f3b 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -6,6 +6,7 @@ #include "nvjitlink_checker.hpp" #include +#include #include #include #include @@ -24,7 +25,17 @@ void AlgorithmPlanner::add_entrypoint() { + std::cerr << "[JIT] AlgorithmPlanner::add_entrypoint - looking for entrypoint: " + << this->entrypoint << std::endl; + std::cerr.flush(); auto entrypoint_fragment = fragment_database().get_fragment(this->entrypoint); + if (entrypoint_fragment == nullptr) { + std::cerr << "[JIT] ERROR: entrypoint fragment is NULL for: " << this->entrypoint << std::endl; + std::cerr.flush(); + } else { + std::cerr << "[JIT] Found entrypoint fragment for: " << this->entrypoint << std::endl; + std::cerr.flush(); + } this->fragments.push_back(entrypoint_fragment); } @@ -52,6 +63,9 @@ std::shared_ptr AlgorithmPlanner::get_launcher() static std::mutex cache_mutex; std::lock_guard lock(cache_mutex); + std::cerr << "[JIT] AlgorithmPlanner::get_launcher called for entrypoint: " << this->entrypoint + << std::endl; + std::cerr.flush(); if (launchers.count(launch_key) == 0) { add_entrypoint(); add_device_functions(); @@ -61,8 +75,19 @@ std::shared_ptr AlgorithmPlanner::get_launcher() log_message += device_function + ","; } log_message.pop_back(); - RAFT_LOG_INFO("%s", log_message.c_str()); + std::cerr << "[JIT] " << log_message << std::endl; + std::cerr.flush(); + + // Time the first-time JIT compilation + auto start_time = std::chrono::high_resolution_clock::now(); launchers[launch_key] = this->build(); + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end_time - start_time); + std::cerr << "[JIT] Compilation completed in " << duration.count() + << " ms for entrypoint: " << this->entrypoint << std::endl; + std::cerr.flush(); + } else { + RAFT_LOG_DEBUG("Using cached JIT launcher for entrypoint: %s", this->entrypoint.c_str()); } return launchers[launch_key]; } @@ -84,6 +109,20 @@ std::shared_ptr AlgorithmPlanner::build() auto result = nvJitLinkCreate(&handle, 3, lopts); check_nvjitlink_result(handle, result); + std::cerr << "[JIT] AlgorithmPlanner::build - Adding " << this->fragments.size() + << " fragments to linker:" << std::endl; + for (size_t i = 0; i < this->fragments.size(); ++i) { + std::cerr << "[JIT] Fragment [" << i << "] pointer: " << (void*)this->fragments[i] + << std::endl; + if (i == 0) { + std::cerr << "[JIT] (Entrypoint fragment)" << std::endl; + } else { + std::cerr << "[JIT] (Device function fragment: " << this->device_functions[i - 1] << ")" + << std::endl; + } + } + std::cerr.flush(); + for (auto& frag : this->fragments) { frag->add_to(handle); } @@ -93,10 +132,6 @@ std::shared_ptr AlgorithmPlanner::build() result = nvJitLinkComplete(handle); check_nvjitlink_result(handle, result); - // Dump CUBIN if CUVS_DUMP_CUBIN is set - static int dump_counter = 0; - bool dump_cubin = std::getenv("CUVS_DUMP_CUBIN") != nullptr; - // get cubin from nvJitLink size_t cubin_size; result = nvJitLinkGetLinkedCubinSize(handle, &cubin_size); @@ -115,10 +150,61 @@ std::shared_ptr AlgorithmPlanner::build() RAFT_CUDA_TRY( cudaLibraryLoadData(&library, cubin.get(), nullptr, nullptr, 0, nullptr, nullptr, 0)); - constexpr unsigned int count = 1; - // NOTE: cudaKernel_t does not need to be freed explicitly + // The entrypoint fragment should contain exactly one __global__ kernel + // Device functions (__device__) don't show up in kernel enumeration + // But we might have kernels from multiple fragments if they were linked together + std::cerr << "[JIT] AlgorithmPlanner::build - Fragments added: " << this->fragments.size() + << " (entrypoint + " << this->device_functions.size() << " device functions)" + << std::endl; + std::cerr << "[JIT] AlgorithmPlanner::build - Entrypoint: " << this->entrypoint << std::endl; + std::cerr.flush(); + + // Enumerate kernels - we expect only 1 kernel from the entrypoint fragment + // Device function fragments contain only __device__ functions, not __global__ kernels + // So they shouldn't show up in kernel enumeration + constexpr unsigned int count = 1; // We expect only 1 kernel from the entrypoint fragment + unsigned int kernel_count = count; std::unique_ptr kernels{new cudaKernel_t[count]}; - RAFT_CUDA_TRY(cudaLibraryEnumerateKernels(kernels.get(), count, library)); + RAFT_CUDA_TRY(cudaLibraryEnumerateKernels(kernels.get(), kernel_count, library)); + + std::cerr << "[JIT] AlgorithmPlanner::build - Requested " << count + << " kernel(s), enumeration returned count: " << kernel_count << std::endl; + std::cerr.flush(); + + if (kernel_count == 0) { + RAFT_FAIL("No kernels found in library for entrypoint: %s", this->entrypoint.c_str()); + } + + if (kernel_count > 1) { + std::cerr << "[JIT] WARNING: Expected 1 kernel but enumeration reports " << kernel_count + << " - using first kernel only" << std::endl; + std::cerr.flush(); + } + + // Use the first (and should be only) kernel from the entrypoint fragment + // Entrypoint fragment is added first, so its kernel should be at index 0 + auto kernel = kernels.release()[0]; + + // Validate the kernel pointer is reasonable (not null, not obviously garbage) + if (kernel == nullptr) { + RAFT_FAIL("Entrypoint kernel is NULL for: %s", this->entrypoint.c_str()); + } + + void* kernel_ptr = (void*)kernel; + uintptr_t ptr_val = (uintptr_t)kernel_ptr; + // Check if pointer looks valid (not null, not obviously ASCII string data) + // On 64-bit systems, valid pointers are typically in the range 0x1000 to 0x7fffffffffff + // but kernel pointers from CUDA driver API can be in higher address ranges + // So we only check for null and obviously invalid values (too small) + if (ptr_val < 0x1000) { + RAFT_FAIL("Entrypoint kernel pointer looks invalid (0x%lx) - too small for: %s", + ptr_val, + this->entrypoint.c_str()); + } + + std::cerr << "[JIT] AlgorithmPlanner::build - Using kernel [0] as entrypoint, pointer: " + << kernel_ptr << std::endl; + std::cerr.flush(); - return std::make_shared(kernels.release()[0]); + return std::make_shared(kernel); } diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index 8bae7a7a03..bc4e1aa4cc 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include +#include #include #include diff --git a/cpp/src/neighbors/detail/cagra/cagra_search.cuh b/cpp/src/neighbors/detail/cagra/cagra_search.cuh index 2d383a2429..c112732470 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_search.cuh @@ -32,6 +32,7 @@ #include #include +// All includes are done before opening namespace to avoid nested namespace issues namespace cuvs::neighbors::cagra::detail { template ; using init_f = @@ -270,10 +278,21 @@ struct dataset_descriptor_host { }; template - dataset_descriptor_host(const DescriptorImpl& dd_host, InitF init) + dataset_descriptor_host(const DescriptorImpl& dd_host, + InitF init, + cuvs::distance::DistanceType metric_val, + uint32_t dataset_block_dim_val, + bool is_vpq_val = false, + uint32_t pq_bits_val = 0, + uint32_t pq_len_val = 0) : value_{std::make_shared(init, sizeof(DescriptorImpl))}, smem_ws_size_in_bytes{dd_host.smem_ws_size_in_bytes()}, - team_size{dd_host.team_size()} + team_size{dd_host.team_size()}, + metric{metric_val}, + dataset_block_dim{dataset_block_dim_val}, + is_vpq{is_vpq_val}, + pq_bits{pq_bits_val}, + pq_len{pq_len_val} { } diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh index ecb09f516c..c2580ac86c 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -117,12 +117,14 @@ struct standard_dataset_descriptor_t : public dataset_descriptor_base_t uint32_t { return sizeof(standard_dataset_descriptor_t) + raft::round_up_safe(dim, DatasetBlockDim) * sizeof(QUERY_T); } + + private: }; template @@ -169,7 +171,6 @@ _RAFT_DEVICE __noinline__ auto setup_workspace_standard( buf[j] = 0; } } - return const_cast(r); } @@ -259,6 +260,18 @@ RAFT_KERNEL __launch_bounds__(1, 1) using desc_type = standard_dataset_descriptor_t; using base_type = typename desc_type::base_type; +#ifdef CUVS_ENABLE_JIT_LTO + // For JIT, we don't use the function pointers, so set them to nullptr + // The free functions are called directly instead + new (out) desc_type(nullptr, // setup_workspace_impl - not used in JIT + nullptr, // compute_distance_impl - not used in JIT + ptr, + size, + dim, + ld, + dataset_norms); +#else + // For CUDA 12 (non-JIT), set the function pointers properly new (out) desc_type(reinterpret_cast( &setup_workspace_standard), reinterpret_cast( @@ -268,6 +281,7 @@ RAFT_KERNEL __launch_bounds__(1, 1) dim, ld, dataset_norms); +#endif } template * dev_ptr, rmm::cuda_stream_view stream) { + // Use init kernel for both JIT and CUDA 12 + // The kernel handles JIT vs non-JIT via ifdef internally standard_dataset_descriptor_init_kernel <<<1, 1, 0, stream>>>(dev_ptr, ptr, size, dim, ld, dataset_norms); RAFT_CUDA_TRY(cudaPeekAtLastError()); - }}; + }, + Metric, + DatasetBlockDim, + false, // is_vpq + 0, // pq_bits + 0}; // pq_len } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh index cdafb173ed..4776081531 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -110,7 +110,7 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t uint32_t { /* SMEM workspace layout: @@ -121,6 +121,8 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t(dim, DatasetBlockDim) * sizeof(QUERY_T); } + + private: }; template @@ -375,6 +377,19 @@ RAFT_KERNEL __launch_bounds__(1, 1) IndexT, DistanceT>; using base_type = typename desc_type::base_type; +#ifdef CUVS_ENABLE_JIT_LTO + // For JIT, we don't use the function pointers, so set them to nullptr + // The free functions are called directly instead + new (out) desc_type(nullptr, // setup_workspace_impl - not used in JIT + nullptr, // compute_distance_impl - not used in JIT + encoded_dataset_ptr, + encoded_dataset_dim, + vq_code_book_ptr, + pq_code_book_ptr, + size, + dim); +#else + // For CUDA 12 (non-JIT), set the function pointers properly new (out) desc_type( reinterpret_cast(&setup_workspace_vpq), reinterpret_cast(&compute_distance_vpq), @@ -384,6 +399,7 @@ RAFT_KERNEL __launch_bounds__(1, 1) pq_code_book_ptr, size, dim); +#endif } template ; using base_type = typename desc_type::base_type; - desc_type dd_host{nullptr, - nullptr, - encoded_dataset_ptr, - encoded_dataset_dim, - vq_code_book_ptr, - pq_code_book_ptr, - size, - dim}; - return host_type{dd_host, + return host_type{desc_type{nullptr, + nullptr, + encoded_dataset_ptr, + encoded_dataset_dim, + vq_code_book_ptr, + pq_code_book_ptr, + size, + dim}, [=](dataset_descriptor_base_t* dev_ptr, rmm::cuda_stream_view stream) { + // Use init kernel for both JIT and CUDA 12 + // The kernel handles JIT vs non-JIT via ifdef internally vpq_dataset_descriptor_init_kernel +#include + +namespace cuvs::neighbors::cagra::detail::multi_kernel_search { + +// Instantiate the apply_filter_kernel_jit function with concrete types +template __global__ void apply_filter_kernel_jit<@index_type@, @distance_type@, @source_index_type@>( + const @source_index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, const std::uint32_t, const @index_type@, uint32_t*, @source_index_type@, @source_index_type@); + +} // namespace cuvs::neighbors::cagra::detail::multi_kernel_search + +#else + +#include +#include +#include "apply_filter_kernel_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_apply_filter_kernel_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +{ + registerAlgorithm( + "apply_filter_kernel", + embedded_apply_filter_kernel_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_apply_filter_kernel_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in index 833ad70df5..dbe22e09b4 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in @@ -9,13 +9,15 @@ #include -namespace cuvs::neighbors::cagra::detail::single_cta_search { +namespace cuvs::neighbors::cagra::detail { // Instantiate the compute_distance_standard function for standard descriptor +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; template __device__ @distance_type@ compute_distance_standard<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( - const @data_type@*, uint32_t, @index_type@, uint32_t, uint32_t, uint32_t, const @distance_type@*); + const args_t, @index_type@); -} // namespace cuvs::neighbors::cagra::detail::single_cta_search +} // namespace cuvs::neighbors::cagra::detail #else @@ -30,7 +32,7 @@ __attribute__((__constructor__)) static void register_compute_distance_standard_ registerAlgorithm( - "compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@", + "compute_distance_@metric_name@_t@team_size@_dim@dataset_block_dim@", embedded_compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, sizeof(embedded_compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh index 62d8796526..7153596376 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh @@ -12,42 +12,28 @@ #include #include -namespace cuvs::neighbors::cagra::detail::single_cta_search { +namespace cuvs::neighbors::cagra::detail { // Extern function implementation for compute_distance_standard (standard descriptor) +// Returns per-thread distance (team_sum must be called by the caller) template -__device__ DistanceT compute_distance_standard(const DataT* dataset_ptr, - uint32_t smem_ws_ptr, - IndexT dataset_index, - uint32_t dim, - uint32_t ld, - uint32_t team_size_bitshift, - const DistanceT* dataset_norms) +__device__ DistanceT +compute_distance_standard(const typename cuvs::neighbors::cagra::detail:: + dataset_descriptor_base_t::args_t args, + IndexT dataset_index) { - using desc_type = cuvs::neighbors::cagra::detail:: + // Call the free function compute_distance_standard directly with args (already loaded) + // Returns per-thread distance (caller must do team_sum) + using desc_t = cuvs::neighbors::cagra::detail:: standard_dataset_descriptor_t; - using base_type = typename desc_type::base_type; - using args_t = typename base_type::args_t; - - // Reconstruct args_t from parameters - args_t args; - args.smem_ws_ptr = smem_ws_ptr; - args.dim = dim; - args.extra_word1 = ld; // dataset_ld - args.extra_ptr1 = (void*)dataset_ptr; // dataset_ptr - args.extra_ptr2 = (void*)dataset_norms; // dataset_norms - - // Call the free function compute_distance_standard - auto per_thread_distances = - cuvs::neighbors::cagra::detail::compute_distance_standard(args, dataset_index); - - // Use team_sum with the provided team_size_bitshift - return device::team_sum(per_thread_distances, team_size_bitshift); + auto per_thread_distance = + cuvs::neighbors::cagra::detail::compute_distance_standard(args, dataset_index); + return per_thread_distance; } -} // namespace cuvs::neighbors::cagra::detail::single_cta_search +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in new file mode 100644 index 0000000000..e7b61a4a7e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -0,0 +1,42 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::multi_kernel_search { + +// Instantiate the compute_distance_to_child_nodes_kernel_jit function with concrete descriptor type +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; +template __global__ void compute_distance_to_child_nodes_kernel_jit( + const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, const desc_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); + +} // namespace cuvs::neighbors::cagra::detail::multi_kernel_search + +#else + +#include +#include +#include "compute_distance_to_child_nodes_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_compute_distance_to_child_nodes_kernel_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +{ + registerAlgorithm( + "compute_distance_to_child_nodes_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", + embedded_compute_distance_to_child_nodes_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_compute_distance_to_child_nodes_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in new file mode 100644 index 0000000000..2808bc5900 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in @@ -0,0 +1,43 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::multi_kernel_search { + +// Instantiate the compute_distance_to_child_nodes_kernel_jit function with concrete VPQ descriptor type +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; +template __global__ void compute_distance_to_child_nodes_kernel_jit( + const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, const desc_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); + +} // namespace cuvs::neighbors::cagra::detail::multi_kernel_search + +#else + +#include +#include +#include "compute_distance_to_child_nodes_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_compute_distance_to_child_nodes_kernel_vpq_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +{ + registerAlgorithm( + "compute_distance_to_child_nodes_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_compute_distance_to_child_nodes_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_compute_distance_to_child_nodes_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in index 811dd16ae5..638791181d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in @@ -9,13 +9,15 @@ #include -namespace cuvs::neighbors::cagra::detail::single_cta_search { +namespace cuvs::neighbors::cagra::detail { // Instantiate the compute_distance_vpq function for VPQ descriptor +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; template __device__ @distance_type@ compute_distance_vpq<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( - const uint8_t*, uint32_t, @index_type@, uint32_t, const @codebook_type@*, const @codebook_type@*, uint32_t); + const args_t, @index_type@); -} // namespace cuvs::neighbors::cagra::detail::single_cta_search +} // namespace cuvs::neighbors::cagra::detail #else @@ -29,7 +31,8 @@ __attribute__((__constructor__)) static void register_compute_distance_vpq_@metr { registerAlgorithm( + tag_dist_@dist_abbrev@, + tag_codebook_half>( "compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", embedded_compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, sizeof(embedded_compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh index 493d2b884e..700f193c99 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh @@ -12,9 +12,10 @@ #include #include -namespace cuvs::neighbors::cagra::detail::single_cta_search { +namespace cuvs::neighbors::cagra::detail { // Extern function implementation for compute_distance_vpq (VPQ descriptor) +// Returns per-thread distance (team_sum must be called by the caller) template -__device__ DistanceT compute_distance_vpq(const uint8_t* encoded_dataset_ptr, - uint32_t smem_ws_ptr, - IndexT dataset_index, - uint32_t encoded_dataset_dim, - const CodebookT* vq_code_book_ptr, - const CodebookT* pq_code_book_ptr, - uint32_t team_size_bitshift) +__device__ DistanceT +compute_distance_vpq(const typename cuvs::neighbors::cagra::detail:: + dataset_descriptor_base_t::args_t args, + IndexT dataset_index) { - using desc_type = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; - using base_type = typename desc_type::base_type; - using args_t = typename base_type::args_t; - - // Reconstruct args_t from parameters - args_t args; - args.smem_ws_ptr = smem_ws_ptr; - args.dim = encoded_dataset_dim; - args.extra_word1 = encoded_dataset_dim; - args.extra_ptr1 = (void*)encoded_dataset_ptr; - args.extra_ptr2 = (void*)vq_code_book_ptr; - // Note: pq_code_book_ptr is stored in shared memory (copied during setup_workspace_vpq), - // and compute_distance_vpq accesses it via args.smem_ws_ptr, so we don't need to pass it - // separately. - - // Call the free function compute_distance_vpq - // It will access the codebook from shared memory via smem_ws_ptr - auto per_thread_distances = - cuvs::neighbors::cagra::detail::compute_distance_vpq(args, dataset_index); - - // Use team_sum with the provided team_size_bitshift - return device::team_sum(per_thread_distances, team_size_bitshift); + // Call the free function compute_distance_vpq directly with args (already loaded) + // Returns per-thread distance (caller must do team_sum) + using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; + auto per_thread_distance = + cuvs::neighbors::cagra::detail::compute_distance_vpq(args, dataset_index); + return per_thread_distance; } -} // namespace cuvs::neighbors::cagra::detail::single_cta_search +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh new file mode 100644 index 0000000000..516df56ca6 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh @@ -0,0 +1,261 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../device_common.hpp" +#include "../hashmap.hpp" +#include "../utils.hpp" +#include "extern_device_functions.cuh" + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail { +namespace device { + +// Helper to check if DescriptorT has kPqBits (VPQ descriptor) +template +struct has_kpq_bits { + template + static auto test(int) -> decltype(U::kPqBits, std::true_type{}); + template + static std::false_type test(...); + static constexpr bool value = decltype(test(0))::value; +}; + +template +inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; + +// JIT version of compute_distance_to_random_nodes - uses extern functions +// Shared between single_cta and multi_cta JIT kernels +template +RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( + IndexT* __restrict__ result_indices_ptr, // [num_pickup] + DistanceT* __restrict__ result_distances_ptr, // [num_pickup] + const DescriptorT* smem_desc, // Concrete descriptor type from template + const uint32_t num_pickup, + const uint32_t num_distilation, + const uint64_t rand_xor_mask, + const IndexT* __restrict__ seed_ptr, // [num_seeds] + const uint32_t num_seeds, + IndexT* __restrict__ visited_hash_ptr, + const uint32_t visited_hash_bitlen, + IndexT* __restrict__ traversed_hash_ptr, + const uint32_t traversed_hash_bitlen, + const uint32_t block_id = 0, + const uint32_t num_blocks = 1) +{ + constexpr unsigned warp_size = 32; + const auto team_size_bits = smem_desc->team_size_bitshift_from_smem(); + const auto max_i = raft::round_up_safe(num_pickup, warp_size >> team_size_bits); + + // Load args once for better performance (avoid repeated loads in the loop) + using args_t = typename cuvs::neighbors::cagra::detail:: + dataset_descriptor_base_t::args_t; + const args_t args = smem_desc->args.load(); + + for (uint32_t i = threadIdx.x >> team_size_bits; i < max_i; i += (blockDim.x >> team_size_bits)) { + const bool valid_i = (i < num_pickup); + + IndexT best_index_team_local = raft::upper_bound(); + DistanceT best_norm2_team_local = raft::upper_bound(); + for (uint32_t j = 0; j < num_distilation; j++) { + // Select a node randomly and compute the distance to it + IndexT seed_index = 0; + if (valid_i) { + uint32_t gid = block_id + (num_blocks * (i + (num_pickup * j))); + if (seed_ptr && (gid < num_seeds)) { + seed_index = seed_ptr[gid]; + } else { + seed_index = device::xorshift64(gid ^ rand_xor_mask) % smem_desc->size; + } + } + + // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum + // Otherwise warp shuffles will hang. Each thread calls the extern function to get + // its per-thread distance, then team_sum reduces across all threads in the team. + DistanceT per_thread_norm2 = 0; + if (valid_i) { + if constexpr (!has_kpq_bits_v) { + // Standard descriptor - use the metric from the descriptor type itself + per_thread_norm2 = compute_distance_standard(args, seed_index); + } else { + // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself + per_thread_norm2 = compute_distance_vpq(args, seed_index); + } + } + // Now ALL threads in the team participate in team_sum + const auto norm2_sum = device::team_sum(per_thread_norm2, team_size_bits); + + if (valid_i && (norm2_sum < best_norm2_team_local)) { + best_norm2_team_local = norm2_sum; + best_index_team_local = seed_index; + } + } + + const unsigned lane_id = threadIdx.x & ((1u << team_size_bits) - 1u); + if (valid_i && lane_id == 0) { + if (best_index_team_local != raft::upper_bound()) { + if (hashmap::insert(visited_hash_ptr, visited_hash_bitlen, best_index_team_local) == 0) { + // Deactivate this entry as insertion into visited hash table has failed. + best_norm2_team_local = raft::upper_bound(); + best_index_team_local = raft::upper_bound(); + } else if ((traversed_hash_ptr != nullptr) && + hashmap::search( + traversed_hash_ptr, traversed_hash_bitlen, best_index_team_local)) { + // Deactivate this entry as it has been already used by others. + best_norm2_team_local = raft::upper_bound(); + best_index_team_local = raft::upper_bound(); + } + } + result_distances_ptr[i] = best_norm2_team_local; + result_indices_ptr[i] = best_index_team_local; + // Debug: print first few random node distances + if (i < 3 && block_id == 0) { + printf("JIT random: i=%u idx=%u dist=%.6f\n", + i, + best_index_team_local, + (float)best_norm2_team_local); + } + } + } +} + +// JIT version of compute_distance_to_child_nodes - uses extern functions +// Shared between single_cta and multi_cta JIT kernels +template +RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( + IndexT* __restrict__ result_child_indices_ptr, + DistanceT* __restrict__ result_child_distances_ptr, + const DescriptorT* smem_desc, // Concrete descriptor type from template + const IndexT* __restrict__ knn_graph, + const uint32_t knn_k, + IndexT* __restrict__ visited_hashmap_ptr, + const uint32_t visited_hash_bitlen, + IndexT* __restrict__ traversed_hashmap_ptr, + const uint32_t traversed_hash_bitlen, + const IndexT* __restrict__ parent_indices, + const IndexT* __restrict__ internal_topk_list, + const uint32_t search_width, + int* __restrict__ result_position = nullptr, + const int max_result_position = 0) +{ + constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; + constexpr IndexT invalid_index = ~static_cast(0); + + // Read child indices of parents from knn graph and check if the distance computation is + // necessary. + for (uint32_t i = threadIdx.x; i < knn_k * search_width; i += blockDim.x) { + const IndexT smem_parent_id = parent_indices[i / knn_k]; + IndexT child_id = invalid_index; + if (smem_parent_id != invalid_index) { + const auto parent_id = internal_topk_list[smem_parent_id] & ~index_msb_1_mask; + child_id = knn_graph[(i % knn_k) + (static_cast(knn_k) * parent_id)]; + } + if (child_id != invalid_index) { + if (hashmap::insert(visited_hashmap_ptr, visited_hash_bitlen, child_id) == 0) { + child_id = invalid_index; + } else if ((traversed_hashmap_ptr != nullptr) && + hashmap::search( + traversed_hashmap_ptr, traversed_hash_bitlen, child_id)) { + child_id = invalid_index; + } + } + if (STATIC_RESULT_POSITION) { + result_child_indices_ptr[i] = child_id; + } else if (child_id != invalid_index) { + int j = atomicSub(result_position, 1) - 1; + result_child_indices_ptr[j] = child_id; + } + } + __syncthreads(); + + // Compute the distance to child nodes using extern compute_distance + constexpr unsigned warp_size = 32; + const auto team_size_bits = smem_desc->team_size_bitshift_from_smem(); + const auto num_k = knn_k * search_width; + const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bits); + const bool lead_lane = (threadIdx.x & ((1u << team_size_bits) - 1u)) == 0; + const uint32_t ofst = STATIC_RESULT_POSITION ? 0 : result_position[0]; + + // Load args once for better performance (avoid repeated loads in the loop) + using args_t = typename cuvs::neighbors::cagra::detail:: + dataset_descriptor_base_t::args_t; + const args_t args = smem_desc->args.load(); + + for (uint32_t i = threadIdx.x >> team_size_bits; i < max_i; i += blockDim.x >> team_size_bits) { + const auto j = i + ofst; + const bool valid_i = STATIC_RESULT_POSITION ? (j < num_k) : (j < max_result_position); + const auto child_id = valid_i ? result_child_indices_ptr[j] : invalid_index; + + // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum + // Otherwise warp shuffles will hang. Each thread calls the extern function to get + // its per-thread distance, then team_sum reduces across all threads in the team. + DistanceT per_thread_dist = 0; + if (child_id != invalid_index) { + if constexpr (!has_kpq_bits_v) { + // Standard descriptor - use the metric from the descriptor type itself + per_thread_dist = compute_distance_standard(args, child_id); + } else { + // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself + per_thread_dist = compute_distance_vpq(args, child_id); + } + } else { + // Invalid child_id: lead lane gets upper_bound, others get 0 + per_thread_dist = lead_lane ? raft::upper_bound() : 0; + } + + // Now ALL threads in the team participate in team_sum + DistanceT child_dist = device::team_sum(per_thread_dist, team_size_bits); + __syncwarp(); + + // Store the distance + if (valid_i && lead_lane) { + result_child_distances_ptr[j] = child_dist; + // Debug: print first few child node distances + if (j < 3 && threadIdx.x < 32) { + printf("JIT child: j=%u idx=%u dist=%.6f\n", j, child_id, (float)child_dist); + } + } + } +} + +} // namespace device +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh new file mode 100644 index 0000000000..3437f20d3b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -0,0 +1,121 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance.hpp" +#include + +// Forward declarations of descriptor types (full definitions are in -impl.cuh files) +// This file is only included by JIT kernel headers which are included by .cu.in files +// The .cu.in files include the -impl.cuh files directly for full type definitions +namespace cuvs::neighbors::cagra::detail { +template +struct standard_dataset_descriptor_t; + +template +struct cagra_q_dataset_descriptor_t; +} // namespace cuvs::neighbors::cagra::detail + +namespace cuvs::neighbors::cagra::detail { + +// All extern function declarations are in the cuvs::neighbors::cagra::detail namespace +// so they can be used by all search modes without being beholden to any specific sub-namespace + +// Standard descriptor extern functions +template +extern __device__ const + standard_dataset_descriptor_t* + setup_workspace_standard(const standard_dataset_descriptor_t* desc, + void* smem, + const DataT* queries, + uint32_t query_id); + +template +extern __device__ DistanceT compute_distance_standard( + const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index); + +// VPQ descriptor extern functions +template +extern __device__ const cagra_q_dataset_descriptor_t* +setup_workspace_vpq(const cagra_q_dataset_descriptor_t* desc, + void* smem, + const DataT* queries, + uint32_t query_id); + +template +extern __device__ DistanceT compute_distance_vpq( + const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index); + +// Sample filter extern function - linked separately via JIT LTO +// Takes 3 params: query_id, node_id, and filter_data (void* pointer to filter-specific data) +// For none_filter: filter_data can be nullptr +// For bitset_filter: filter_data points to bitset_filter_data_t struct +template +extern __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* filter_data); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter.cu.in new file mode 100644 index 0000000000..a287f10f12 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter.cu.in @@ -0,0 +1,32 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the sample_filter device function template +template __device__ bool sample_filter<@source_index_type@>(uint32_t, @source_index_type@, void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include "@kernel_name_var@.h" + +__attribute__((__constructor__)) static void register_@kernel_name_var@() +{ + registerAlgorithm( + "sample_filter_@filter_name_var@", + embedded_@kernel_name_var@, + sizeof(embedded_@kernel_name_var@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh new file mode 100644 index 0000000000..3440b8d54d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh @@ -0,0 +1,36 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../../../sample_filter.cuh" +#include "filter_data.h" +#include + +namespace cuvs::neighbors::cagra::detail { + +template +__device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* filter_data) +{ + // bitset_filter checks if the node_id is in the bitset + // filter_data points to bitset_filter_data_t struct + if (filter_data == nullptr) { + return true; // No filter data, allow all + } + + auto* bitset_data = static_cast*>(filter_data); + if (bitset_data->bitset_ptr == nullptr) { + return true; // No bitset provided, allow all + } + + // Create bitset_view and filter, matching non-JIT behavior + auto bitset_view = raft::core::bitset_view{ + bitset_data->bitset_ptr, bitset_data->bitset_len, bitset_data->original_nbits}; + auto bitset_filter = + cuvs::neighbors::filtering::bitset_filter{bitset_view}; + return bitset_filter(query_id, node_id); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h new file mode 100644 index 0000000000..b671eda513 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h @@ -0,0 +1,27 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include + +namespace cuvs::neighbors::cagra::detail { + +// Structure to hold bitset filter data +// This is passed as void* to the extern sample_filter function +template +struct bitset_filter_data_t { + uint32_t* bitset_ptr; // Pointer to bitset data in global memory + SourceIndexT bitset_len; // Length of bitset array + SourceIndexT original_nbits; // Original number of bits + + __device__ bitset_filter_data_t(uint32_t* ptr, SourceIndexT len, SourceIndexT nbits) + : bitset_ptr(ptr), bitset_len(len), original_nbits(nbits) + { + } +}; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_none.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_none.cuh new file mode 100644 index 0000000000..18adbaaa58 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_none.cuh @@ -0,0 +1,20 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../../../sample_filter.cuh" + +namespace cuvs::neighbors::cagra::detail { + +template +__device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* filter_data) +{ + // none_sample_filter always returns true (no filtering) + // filter_data is ignored (can be nullptr) + return true; +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in new file mode 100644 index 0000000000..f00927694c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -0,0 +1,42 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::multi_kernel_search { + +// Instantiate the random_pickup_kernel_jit function with concrete descriptor type +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; +template __global__ void random_pickup_kernel_jit( + const desc_t*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); + +} // namespace cuvs::neighbors::cagra::detail::multi_kernel_search + +#else + +#include +#include +#include "random_pickup_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_random_pickup_kernel_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "random_pickup_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", + embedded_random_pickup_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_random_pickup_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in new file mode 100644 index 0000000000..8f034a2bce --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in @@ -0,0 +1,43 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::multi_kernel_search { + +// Instantiate the random_pickup_kernel_jit function with concrete VPQ descriptor type +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; +template __global__ void random_pickup_kernel_jit( + const desc_t*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); + +} // namespace cuvs::neighbors::cagra::detail::multi_kernel_search + +#else + +#include +#include +#include "random_pickup_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_random_pickup_kernel_vpq_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "random_pickup_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_random_pickup_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_random_pickup_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_helpers.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_helpers.cuh new file mode 100644 index 0000000000..fe985f7275 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_helpers.cuh @@ -0,0 +1,138 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::multi_cta_search { + +template +RAFT_DEVICE_INLINE_FUNCTION void pickup_next_parent( + INDEX_T* const next_parent_indices, + INDEX_T* const itopk_indices, // [itopk_size * 2] + DISTANCE_T* const itopk_distances, // [itopk_size * 2] + INDEX_T* const hash_ptr, + const uint32_t hash_bitlen) +{ + constexpr uint32_t itopk_size = 32; + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + constexpr INDEX_T invalid_index = ~static_cast(0); + + const unsigned warp_id = threadIdx.x / 32; + if (warp_id > 0) { return; } + if (threadIdx.x == 0) { next_parent_indices[0] = invalid_index; } + __syncwarp(); + + int j = -1; + for (unsigned i = threadIdx.x; i < itopk_size * 2; i += 32) { + INDEX_T index = itopk_indices[i]; + int is_invalid = 0; + int is_candidate = 0; + if (index == invalid_index) { + is_invalid = 1; + } else if (index & index_msb_1_mask) { + } else { + is_candidate = 1; + } + + const auto ballot_mask = __ballot_sync(0xffffffff, is_candidate); + const auto candidate_id = __popc(ballot_mask & ((1 << threadIdx.x) - 1)); + for (int k = 0; k < __popc(ballot_mask); k++) { + int flag_done = 0; + if (is_candidate && candidate_id == k) { + is_candidate = 0; + if (hashmap::insert(hash_ptr, hash_bitlen, index)) { + // Use this candidate as next parent + index |= index_msb_1_mask; // set most significant bit as used node + if (i < itopk_size) { + next_parent_indices[0] = i; + itopk_indices[i] = index; + } else { + next_parent_indices[0] = j; + // Move the next parent node from i-th position to j-th position + itopk_indices[j] = index; + itopk_distances[j] = itopk_distances[i]; + itopk_indices[i] = invalid_index; + itopk_distances[i] = utils::get_max_value(); + } + flag_done = 1; + } else { + // Deactivate the node since it has been used by other CTA. + itopk_indices[i] = invalid_index; + itopk_distances[i] = utils::get_max_value(); + is_invalid = 1; + } + } + if (__any_sync(0xffffffff, (flag_done > 0))) { return; } + } + if (i < itopk_size) { + j = 31 - __clz(__ballot_sync(0xffffffff, is_invalid)); + if (j < 0) { return; } + } + } +} + +template +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort(float* distances, // [num_elements] + INDEX_T* indices, // [num_elements] + const uint32_t num_elements) +{ + const unsigned warp_id = threadIdx.x / raft::warp_size(); + if (warp_id > 0) { return; } + const unsigned lane_id = threadIdx.x % raft::warp_size(); + constexpr unsigned N = (MAX_ELEMENTS + (raft::warp_size() - 1)) / raft::warp_size(); + float key[N]; + INDEX_T val[N]; + for (unsigned i = 0; i < N; i++) { + unsigned j = lane_id + (raft::warp_size() * i); + if (j < num_elements) { + key[i] = distances[j]; + val[i] = indices[j]; + } else { + key[i] = utils::get_max_value(); + val[i] = ~static_cast(0); + } + } + /* Warp Sort */ + bitonic::warp_sort(key, val); + /* Store sorted results */ + for (unsigned i = 0; i < N; i++) { + unsigned j = (N * lane_id) + i; + if (j < num_elements) { + distances[j] = key[i]; + indices[j] = val[i]; + } + } +} + +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_wrapper_64( + float* distances, // [num_elements] + uint32_t* indices, // [num_elements] + const uint32_t num_elements) +{ + topk_by_bitonic_sort<64, uint32_t>(distances, indices, num_elements); +} + +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_wrapper_128( + float* distances, // [num_elements] + uint32_t* indices, // [num_elements] + const uint32_t num_elements) +{ + topk_by_bitonic_sort<128, uint32_t>(distances, indices, num_elements); +} + +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_wrapper_256( + float* distances, // [num_elements] + uint32_t* indices, // [num_elements] + const uint32_t num_elements) +{ + topk_by_bitonic_sort<256, uint32_t>(distances, indices, num_elements); +} + +} // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in new file mode 100644 index 0000000000..22ebdc1109 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -0,0 +1,50 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +// Include helpers first so they're available when the JIT kernel is instantiated +#include // For pickup_next_parent and topk_by_bitonic_sort_wrapper_* + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::multi_cta_search { + +// Instantiate the search_kernel_jit function with concrete descriptor type +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; +template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( + @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, uint32_t*, @source_index_type@, @source_index_type@); + +} // namespace cuvs::neighbors::cagra::detail::multi_cta_search + +#else + +#include +#include +#include "search_multi_cta_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_search_multi_cta_kernel_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +{ + registerAlgorithm( + "search_multi_cta_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", + embedded_search_multi_cta_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_multi_cta_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh new file mode 100644 index 0000000000..8386a1d41a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -0,0 +1,451 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance-ext.cuh" +#include "../device_common.hpp" +#include "../hashmap.hpp" +#include "../utils.hpp" +// Note: +// - Extern function declarations (setup_workspace_standard, compute_distance_standard, etc.) use +// types from compute_distance-ext.cuh +// - Type definitions (standard_dataset_descriptor_t, etc.) are in the -impl.cuh files, included by +// the .cu.in files for template instantiation +// - pickup_next_parent and topk_by_bitonic_sort_wrapper_* are included via +// search_multi_cta_helpers.cuh in the .cu.in file + +#include // For DistanceType enum +#include // For raft::upper_bound +#include // For raft::round_up_safe + +#include +#include +#include // For std::is_same_v, std::true_type, std::false_type + +#ifdef _CLK_BREAKDOWN +#include // For printf in debug code +#endif + +// Include extern function declarations before namespace so they're available to kernel definitions +#include "extern_device_functions.cuh" +#include "filter_data.h" +// Include shared JIT device functions before namespace so they're available to kernel definitions +#include "device_common_jit.cuh" + +namespace cuvs::neighbors::cagra::detail::multi_cta_search { + +// Helper to check if DescriptorT has kPqBits (VPQ descriptor) - use shared version +// Use fully qualified name since it's a template variable +using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; + +// sample_filter is declared in extern_device_functions.cuh + +// JIT versions of compute_distance_to_random_nodes and compute_distance_to_child_nodes +// are now shared in device_common_jit.cuh - use fully qualified names +using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; +using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; + +// JIT version of search_kernel - uses extern functions with concrete descriptor type +// Filter is linked separately via JIT LTO, so we use none_sample_filter directly +template +RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( + IndexT* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] + DistanceT* const result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] + const DescriptorT* dataset_desc, // Concrete descriptor type from template + const DataT* const queries_ptr, // [num_queries, dataset_dim] + const IndexT* const knn_graph, // [dataset_size, graph_degree] + const uint32_t max_elements, + const uint32_t graph_degree, + const SourceIndexT* source_indices_ptr, // [num_queries, search_width] + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const IndexT* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + const uint32_t visited_hash_bitlen, + IndexT* const traversed_hashmap_ptr, // [num_queries, 1 << traversed_hash_bitlen] + const uint32_t traversed_hash_bitlen, + const uint32_t itopk_size, + const uint32_t min_iteration, + const uint32_t max_iteration, + uint32_t* const num_executed_iterations, /* stats */ + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits +{ + using DATA_T = DataT; + using INDEX_T = IndexT; + using DISTANCE_T = DistanceT; + + auto to_source_index = [source_indices_ptr](INDEX_T x) { + return source_indices_ptr == nullptr ? static_cast(x) : source_indices_ptr[x]; + }; + + // CRITICAL DEBUG: Write to result buffer IMMEDIATELY to verify kernel is executing + // Write a magic value that we can check on host - do this before ANY other code + if (threadIdx.x == 0 && blockIdx.x == 0 && blockIdx.y == 0) { + // Write magic value to first distance to verify kernel execution + if (result_distances_ptr != nullptr) { + *result_distances_ptr = static_cast(3735928559.0f); // 0xDEADBEEF as float + } + // Also write to indices + if (result_indices_ptr != nullptr) { *result_indices_ptr = static_cast(0xCAFEBABE); } + printf("JIT KERNEL EXECUTING: threadIdx=0, wrote magic values\n"); + } + __syncthreads(); + + const auto num_queries = gridDim.y; + const auto query_id = blockIdx.y; + const auto num_cta_per_query = gridDim.x; + const auto cta_id = blockIdx.x; // local CTA ID + +#ifdef _CLK_BREAKDOWN + uint64_t clk_init = 0; + uint64_t clk_compute_1st_distance = 0; + uint64_t clk_topk = 0; + uint64_t clk_pickup_parents = 0; + uint64_t clk_compute_distance = 0; + uint64_t clk_start; +#define _CLK_START() clk_start = clock64() +#define _CLK_REC(V) V += clock64() - clk_start; +#else +#define _CLK_START() +#define _CLK_REC(V) +#endif + _CLK_START(); + + extern __shared__ uint8_t smem[]; + + // Layout of result_buffer + // +----------------+---------+---------------------------+ + // | internal_top_k | padding | neighbors of parent nodes | + // | | upto 32 | | + // +----------------+---------+---------------------------+ + // |<--- result_buffer_size_32 --->| + const auto result_buffer_size = itopk_size + graph_degree; + const auto result_buffer_size_32 = raft::round_up_safe(result_buffer_size, 32); + assert(result_buffer_size_32 <= max_elements); + + // Get smem_ws_size_in_bytes using static method (dim is in descriptor args) + uint32_t dim = dataset_desc->args.dim; + uint32_t smem_ws_size_in_bytes = DescriptorT::get_smem_ws_size_in_bytes(dim); + + // Set smem working buffer for the distance calculation using extern function + // setup_workspace copies the descriptor to shared memory and returns pointer to smem descriptor + // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) + const DescriptorT* smem_desc = nullptr; + // Check if DescriptorT is a standard_dataset_descriptor_t by checking if it doesn't have kPqBits + // (standard descriptors don't have kPqBits, VPQ descriptors do) + if constexpr (!has_kpq_bits_v) { + // Standard descriptor - use the metric from the descriptor type itself + smem_desc = setup_workspace_standard(dataset_desc, smem, queries_ptr, query_id); + } else { + // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself + smem_desc = setup_workspace_vpq(dataset_desc, smem, queries_ptr, query_id); + } + + auto* __restrict__ result_indices_buffer = + reinterpret_cast(smem + smem_ws_size_in_bytes); + auto* __restrict__ result_distances_buffer = + reinterpret_cast(result_indices_buffer + result_buffer_size_32); + auto* __restrict__ local_visited_hashmap_ptr = + reinterpret_cast(result_distances_buffer + result_buffer_size_32); + auto* __restrict__ parent_indices_buffer = + reinterpret_cast(local_visited_hashmap_ptr + hashmap::get_size(visited_hash_bitlen)); + auto* __restrict__ result_position = reinterpret_cast(parent_indices_buffer + 1); + + INDEX_T* const local_traversed_hashmap_ptr = + traversed_hashmap_ptr + (hashmap::get_size(traversed_hash_bitlen) * query_id); + + constexpr INDEX_T invalid_index = ~static_cast(0); + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + + for (unsigned i = threadIdx.x; i < result_buffer_size_32; i += blockDim.x) { + result_indices_buffer[i] = invalid_index; + result_distances_buffer[i] = utils::get_max_value(); + } + hashmap::init(local_visited_hashmap_ptr, visited_hash_bitlen); + __syncthreads(); + _CLK_REC(clk_init); + + // compute distance to randomly selecting nodes using JIT version + _CLK_START(); + const INDEX_T* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; + uint32_t block_id = cta_id + (num_cta_per_query * query_id); + uint32_t num_blocks = num_cta_per_query * num_queries; + + compute_distance_to_random_nodes_jit( + result_indices_buffer, + result_distances_buffer, + smem_desc, + graph_degree, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + visited_hash_bitlen, + local_traversed_hashmap_ptr, + traversed_hash_bitlen, + block_id, + num_blocks); + __syncthreads(); + _CLK_REC(clk_compute_1st_distance); + + uint32_t iter = 0; + while (1) { + _CLK_START(); + if (threadIdx.x < 32) { + // [1st warp] Topk with bitonic sort + if constexpr (std::is_same_v) { + // use a non-template wrapper function to avoid pre-inlining the topk_by_bitonic_sort + // function (vs post-inlining, this impacts register pressure) + if (max_elements <= 64) { + topk_by_bitonic_sort_wrapper_64( + result_distances_buffer, result_indices_buffer, result_buffer_size_32); + } else if (max_elements <= 128) { + topk_by_bitonic_sort_wrapper_128( + result_distances_buffer, result_indices_buffer, result_buffer_size_32); + } else { + assert(max_elements <= 256); + topk_by_bitonic_sort_wrapper_256( + result_distances_buffer, result_indices_buffer, result_buffer_size_32); + } + } else { + if (max_elements <= 64) { + topk_by_bitonic_sort<64, INDEX_T>( + result_distances_buffer, result_indices_buffer, result_buffer_size_32); + } else if (max_elements <= 128) { + topk_by_bitonic_sort<128, INDEX_T>( + result_distances_buffer, result_indices_buffer, result_buffer_size_32); + } else { + assert(max_elements <= 256); + topk_by_bitonic_sort<256, INDEX_T>( + result_distances_buffer, result_indices_buffer, result_buffer_size_32); + } + } + } + __syncthreads(); + _CLK_REC(clk_topk); + + if (iter + 1 >= max_iteration) { break; } + + _CLK_START(); + if (threadIdx.x < 32) { + // [1st warp] Pick up a next parent + pickup_next_parent(parent_indices_buffer, + result_indices_buffer, + result_distances_buffer, + local_traversed_hashmap_ptr, + traversed_hash_bitlen); + } else { + // [Other warps] Reset visited hashmap + hashmap::init(local_visited_hashmap_ptr, visited_hash_bitlen, 32); + } + __syncthreads(); + _CLK_REC(clk_pickup_parents); + + if ((parent_indices_buffer[0] == invalid_index) && (iter >= min_iteration)) { break; } + + _CLK_START(); + for (unsigned i = threadIdx.x; i < result_buffer_size_32; i += blockDim.x) { + INDEX_T index = result_indices_buffer[i]; + if (index == invalid_index) { continue; } + if ((i >= itopk_size) && (index & index_msb_1_mask)) { + // Remove nodes kicked out of the itopk list from the traversed hash table. + hashmap::remove( + local_traversed_hashmap_ptr, traversed_hash_bitlen, index & ~index_msb_1_mask); + result_indices_buffer[i] = invalid_index; + result_distances_buffer[i] = utils::get_max_value(); + } else { + // Restore visited hashmap by putting nodes on result buffer in it. + index &= ~index_msb_1_mask; + hashmap::insert(local_visited_hashmap_ptr, visited_hash_bitlen, index); + } + } + // Initialize buffer for compute_distance_to_child_nodes. + if (threadIdx.x == blockDim.x - 1) { result_position[0] = result_buffer_size_32; } + __syncthreads(); + + // Compute the norms between child nodes and query node using JIT version + compute_distance_to_child_nodes_jit( + result_indices_buffer, + result_distances_buffer, + smem_desc, + knn_graph, + graph_degree, + local_visited_hashmap_ptr, + visited_hash_bitlen, + local_traversed_hashmap_ptr, + traversed_hash_bitlen, + parent_indices_buffer, + result_indices_buffer, + 1, + result_position, + result_buffer_size_32); + __syncthreads(); + + // Check the state of the nodes in the result buffer which were not updated + // by the compute_distance_to_child_nodes above, and if it cannot be used as + // a parent node, it is deactivated. + for (uint32_t i = threadIdx.x; i < result_position[0]; i += blockDim.x) { + INDEX_T index = result_indices_buffer[i]; + if (index == invalid_index || index & index_msb_1_mask) { continue; } + if (hashmap::search(local_traversed_hashmap_ptr, traversed_hash_bitlen, index)) { + result_indices_buffer[i] = invalid_index; + result_distances_buffer[i] = utils::get_max_value(); + } + } + __syncthreads(); + _CLK_REC(clk_compute_distance); + + // Filtering - use extern sample_filter function (linked via JIT LTO) + for (unsigned p = threadIdx.x; p < 1; p += blockDim.x) { + if (parent_indices_buffer[p] != invalid_index) { + const auto parent_id = result_indices_buffer[parent_indices_buffer[p]] & ~index_msb_1_mask; + // Construct filter_data struct (bitset data is in global memory) + cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + bitset_ptr, bitset_len, original_nbits); + if (!sample_filter(query_id, + to_source_index(parent_id), + bitset_ptr != nullptr ? &filter_data : nullptr)) { + // If the parent must not be in the resulting top-k list, remove from the parent list + result_distances_buffer[parent_indices_buffer[p]] = utils::get_max_value(); + result_indices_buffer[parent_indices_buffer[p]] = invalid_index; + } + } + } + __syncthreads(); + + iter++; + } + + // Filtering - use extern sample_filter function (linked via JIT LTO) + for (uint32_t i = threadIdx.x; i < result_buffer_size_32; i += blockDim.x) { + INDEX_T index = result_indices_buffer[i]; + if (index == invalid_index) { continue; } + index &= ~index_msb_1_mask; + // Construct filter_data struct (bitset data is in global memory) + cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + bitset_ptr, bitset_len, original_nbits); + if (!sample_filter( + query_id, to_source_index(index), bitset_ptr != nullptr ? &filter_data : nullptr)) { + result_indices_buffer[i] = invalid_index; + result_distances_buffer[i] = utils::get_max_value(); + } + } + __syncthreads(); + + // Output search results (1st warp only). + if (threadIdx.x < 32) { + // Debug: print buffer contents before output + if (query_id == 0 && cta_id == 0 && threadIdx.x < 5) { + printf("JIT pre-output: i=%u idx=%u dist=%.6f\n", + threadIdx.x, + result_indices_buffer[threadIdx.x], + (float)result_distances_buffer[threadIdx.x]); + } + uint32_t offset = 0; + for (uint32_t i = threadIdx.x; i < result_buffer_size_32; i += 32) { + INDEX_T index = result_indices_buffer[i]; + bool is_valid = false; + if (index != invalid_index) { + if (index & index_msb_1_mask) { + is_valid = true; + index &= ~index_msb_1_mask; + } else if ((offset < itopk_size) && + hashmap::insert( + local_traversed_hashmap_ptr, traversed_hash_bitlen, index)) { + // If a node that is not used as a parent can be inserted into + // the traversed hash table, it is considered a valid result. + is_valid = true; + } + } + const auto mask = __ballot_sync(0xffffffff, is_valid); + if (is_valid) { + const auto j = offset + __popc(mask & ((1 << threadIdx.x) - 1)); + if (j < itopk_size) { + uint32_t k = j + (itopk_size * (cta_id + (num_cta_per_query * query_id))); + result_indices_ptr[k] = index & ~index_msb_1_mask; + if (result_distances_ptr != nullptr) { + DISTANCE_T dist = result_distances_buffer[i]; + result_distances_ptr[k] = dist; + // Debug: print first query, first CTA, first few results + if (query_id == 0 && cta_id == 0 && j < 5) { + printf("JIT: query=%u cta=%u j=%u i=%u idx=%u dist=%.6f buf_dist=%.6f\n", + query_id, + cta_id, + j, + i, + index & ~index_msb_1_mask, + (float)dist, + (float)result_distances_buffer[i]); + } + } + } else { + // If it is valid and registered in the traversed hash table but is + // not output as a result, it is removed from the hash table. + hashmap::remove(local_traversed_hashmap_ptr, traversed_hash_bitlen, index); + } + } + offset += __popc(mask); + } + // If the number of outputs is insufficient, fill in with invalid results. + for (uint32_t i = offset + threadIdx.x; i < itopk_size; i += 32) { + uint32_t k = i + (itopk_size * (cta_id + (num_cta_per_query * query_id))); + result_indices_ptr[k] = invalid_index; + if (result_distances_ptr != nullptr) { + result_distances_ptr[k] = utils::get_max_value(); + } + } + } + + if (threadIdx.x == 0 && cta_id == 0 && num_executed_iterations != nullptr) { + num_executed_iterations[query_id] = iter + 1; + } + +#ifdef _CLK_BREAKDOWN + if ((threadIdx.x == 0 || threadIdx.x == blockDim.x - 1) && (blockIdx.x == 0) && + ((query_id * 3) % gridDim.y < 3)) { + printf( + "%s:%d " + "query, %d, thread, %d" + ", init, %lu" + ", 1st_distance, %lu" + ", topk, %lu" + ", pickup_parents, %lu" + ", distance, %lu" + "\n", + __FILE__, + __LINE__, + query_id, + threadIdx.x, + clk_init, + clk_compute_1st_distance, + clk_topk, + clk_pickup_parents, + clk_compute_distance); + } +#endif +} + +} // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in new file mode 100644 index 0000000000..0de95f75c8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in @@ -0,0 +1,51 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +// Include helpers first so they're available when the JIT kernel is instantiated +#include // For pickup_next_parent and topk_by_bitonic_sort_wrapper_* + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::multi_cta_search { + +// Instantiate the search_kernel_jit function with concrete VPQ descriptor type +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; +template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( + @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, uint32_t*, @source_index_type@, @source_index_type@); + +} // namespace cuvs::neighbors::cagra::detail::multi_cta_search + +#else + +#include +#include +#include "search_multi_cta_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_search_multi_cta_kernel_vpq_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +{ + registerAlgorithm( + "search_multi_cta_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_search_multi_cta_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_multi_cta_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp new file mode 100644 index 0000000000..86243b8168 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -0,0 +1,142 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +// Include tags header before namespace (it defines a namespace) +#include + +#include +#include +#include +#include +#include + +// Use nested namespace syntax to allow inclusion from within parent namespace +namespace cuvs { +namespace neighbors { +namespace cagra { +namespace detail { +namespace multi_cta_search { + +template +struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { + CagraMultiCtaSearchPlanner(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq = false, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + : AlgorithmPlanner( + build_entrypoint_name(metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len), + is_vpq ? make_fragment_key() + : make_fragment_key()), + entrypoint_name_( + build_entrypoint_name(metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len)) + { + } + + const std::string& get_entrypoint_name() const { return entrypoint_name_; } + + void add_setup_workspace_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + std::string key = "setup_workspace_"; + if (is_vpq) { + key += "vpq_"; + using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + key += "_" + params; + } else { + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + params; + } + this->device_functions.push_back(key); + } + + void add_compute_distance_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + std::string key = "compute_distance_"; + if (is_vpq) { + key += "vpq_"; + using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + key += "_" + params; + } else { + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + params; + } + this->device_functions.push_back(key); + } + + void add_sample_filter_device_function(std::string filter_name) + { + this->device_functions.push_back("sample_filter_" + filter_name); + } + + private: + std::string entrypoint_name_; + + static std::string build_entrypoint_name(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits, + uint32_t pq_len) + { + std::string name = "search_multi_cta_kernel_"; + if (is_vpq) { name += "vpq_"; } + name += metric_to_string(metric); + name += "_t" + std::to_string(team_size); + name += "_dim" + std::to_string(dataset_block_dim); + if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } + return name; + } + + static std::string metric_to_string(cuvs::distance::DistanceType metric) + { + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2Unexpanded: return "L2Expanded"; + case cuvs::distance::DistanceType::InnerProduct: return "InnerProduct"; + case cuvs::distance::DistanceType::CosineExpanded: return "CosineExpanded"; + case cuvs::distance::DistanceType::BitwiseHamming: return "BitwiseHamming"; + default: return "Unknown"; + } + } +}; + +} // namespace multi_cta_search +} // namespace detail +} // namespace cagra +} // namespace neighbors +} // namespace cuvs diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh new file mode 100644 index 0000000000..5fb4369fd8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh @@ -0,0 +1,349 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance-ext.cuh" +#include "../device_common.hpp" +#include "../hashmap.hpp" +#include "../utils.hpp" +// Note: The .cu.in files include the -impl.cuh files directly when instantiating kernels + +#include // For DistanceType enum +#include // For raft::upper_bound + +#include +#include +#include // For std::is_same_v, std::true_type, std::false_type + +// Include extern function declarations before namespace so they're available to kernel definitions +#include "extern_device_functions.cuh" +#include "filter_data.h" + +namespace cuvs::neighbors::cagra::detail::multi_kernel_search { + +// Helper to check if DescriptorT has kPqBits (VPQ descriptor) +template +struct has_kpq_bits { + template + static auto test(int) -> decltype(U::kPqBits, std::true_type{}); + template + static std::false_type test(...); + static constexpr bool value = decltype(test(0))::value; +}; + +template +inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; + +// JIT version of random_pickup_kernel - uses extern functions +template +RAFT_KERNEL random_pickup_kernel_jit( + const DescriptorT* dataset_desc, // Concrete descriptor type from template + const typename DescriptorT::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const std::size_t num_pickup, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DescriptorT::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DescriptorT::INDEX_T* const result_indices_ptr, // [num_queries, ldr] + typename DescriptorT::DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] + const std::uint32_t ldr, // (*) ldr >= num_pickup + typename DescriptorT::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] + const std::uint32_t hash_bitlen) +{ + using DATA_T = typename DescriptorT::DATA_T; + using INDEX_T = typename DescriptorT::INDEX_T; + using DISTANCE_T = typename DescriptorT::DISTANCE_T; + + const auto team_size_bits = dataset_desc->team_size_bitshift(); + const auto ldb = hashmap::get_size(hash_bitlen); + const auto global_team_index = (blockIdx.x * blockDim.x + threadIdx.x) >> team_size_bits; + const uint32_t query_id = blockIdx.y; + if (global_team_index >= num_pickup) { return; } + extern __shared__ uint8_t smem[]; + + // Set smem working buffer for the distance calculation using extern function + // setup_workspace copies the descriptor to shared memory and returns pointer to smem descriptor + // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) + const DescriptorT* smem_desc = nullptr; + // Check if DescriptorT is a standard_dataset_descriptor_t by checking if it doesn't have kPqBits + // (standard descriptors don't have kPqBits, VPQ descriptors do) + if constexpr (!has_kpq_bits_v) { + // Standard descriptor - use the metric from the descriptor type itself + // DescriptorT should already be standard_dataset_descriptor_t where Metric matches + // DescriptorT::kMetric + smem_desc = setup_workspace_standard(dataset_desc, smem, queries_ptr, query_id); + } else { + // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself + // DescriptorT should already be cagra_q_dataset_descriptor_t where Metric matches + // DescriptorT::kMetric + smem_desc = setup_workspace_vpq(dataset_desc, smem, queries_ptr, query_id); + } + __syncthreads(); + + // Load args once for better performance (avoid repeated loads in the loop) + using args_t = typename cuvs::neighbors::cagra::detail:: + dataset_descriptor_base_t::args_t; + const args_t args = smem_desc->args.load(); + + INDEX_T best_index_team_local; + DISTANCE_T best_norm2_team_local = utils::get_max_value(); + for (unsigned i = 0; i < num_distilation; i++) { + INDEX_T seed_index; + if (seed_ptr && (global_team_index < num_seeds)) { + seed_index = seed_ptr[global_team_index + (num_seeds * query_id)]; + } else { + // Chose a seed node randomly + seed_index = + device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % smem_desc->size; + } + + // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum + // Otherwise warp shuffles will hang. Each thread calls the extern function to get + // its per-thread distance, then team_sum reduces across all threads in the team. + DistanceT per_thread_norm2 = 0; + if constexpr (!has_kpq_bits_v) { + // Standard descriptor - use the metric from the descriptor type itself + per_thread_norm2 = compute_distance_standard(args, seed_index); + } else { + // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself + per_thread_norm2 = compute_distance_vpq(args, seed_index); + } + // Now ALL threads in the team participate in team_sum + const auto norm2 = device::team_sum(per_thread_norm2, team_size_bits); + + if (norm2 < best_norm2_team_local) { + best_norm2_team_local = norm2; + best_index_team_local = seed_index; + } + } + + const auto store_gmem_index = global_team_index + (ldr * query_id); + if ((threadIdx.x & ((1u << team_size_bits) - 1u)) == 0) { + if (hashmap::insert( + visited_hashmap_ptr + (ldb * query_id), hash_bitlen, best_index_team_local)) { + result_distances_ptr[store_gmem_index] = best_norm2_team_local; + result_indices_ptr[store_gmem_index] = best_index_team_local; + } else { + result_distances_ptr[store_gmem_index] = utils::get_max_value(); + result_indices_ptr[store_gmem_index] = utils::get_max_value(); + } + } +} + +// JIT version of compute_distance_to_child_nodes_kernel - uses extern functions +template +RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( + const typename DescriptorT::INDEX_T* const parent_node_list, // [num_queries, search_width] + typename DescriptorT::INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] + typename DescriptorT::DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const std::size_t lds, + const std::uint32_t search_width, + const DescriptorT* dataset_desc, // Concrete descriptor type from template + const typename DescriptorT::INDEX_T* const neighbor_graph_ptr, // [dataset_size, graph_degree] + const std::uint32_t graph_degree, + const SourceIndexT* source_indices_ptr, + const typename DescriptorT::DATA_T* query_ptr, // [num_queries, data_dim] + typename DescriptorT::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + const std::uint32_t hash_bitlen, + typename DescriptorT::INDEX_T* const result_indices_ptr, // [num_queries, ldd] + typename DescriptorT::DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + SAMPLE_FILTER_T sample_filter) +{ + using INDEX_T = typename DescriptorT::INDEX_T; + using DISTANCE_T = typename DescriptorT::DISTANCE_T; + using DataT = typename DescriptorT::DATA_T; + + const auto team_size_bits = dataset_desc->team_size_bitshift(); + const auto team_size = 1u << team_size_bits; + const uint32_t ldb = hashmap::get_size(hash_bitlen); + const auto tid = threadIdx.x + blockDim.x * blockIdx.x; + const auto global_team_id = tid >> team_size_bits; + const auto query_id = blockIdx.y; + + extern __shared__ uint8_t smem[]; + // Load a query using extern function + // setup_workspace copies the descriptor to shared memory and returns pointer to smem descriptor + // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) + const DescriptorT* smem_desc = nullptr; + if constexpr (!has_kpq_bits_v) { + // Standard descriptor - use the metric from the descriptor type itself + smem_desc = setup_workspace_standard(dataset_desc, smem, query_ptr, query_id); + } else { + // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself + smem_desc = setup_workspace_vpq(dataset_desc, smem, query_ptr, query_id); + } + + __syncthreads(); + if (global_team_id >= search_width * graph_degree) { return; } + + const std::size_t parent_list_index = + parent_node_list[global_team_id / graph_degree + (search_width * blockIdx.y)]; + + if (parent_list_index == utils::get_max_value()) { return; } + + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const auto raw_parent_index = parent_candidates_ptr[parent_list_index + (lds * query_id)]; + + if (raw_parent_index == utils::get_max_value()) { + result_distances_ptr[ldd * blockIdx.y + global_team_id] = utils::get_max_value(); + return; + } + const auto parent_index = raw_parent_index & ~index_msb_1_mask; + + const auto neighbor_list_head_ptr = neighbor_graph_ptr + (graph_degree * parent_index); + + const std::size_t child_id = neighbor_list_head_ptr[global_team_id % graph_degree]; + + const auto compute_distance_flag = hashmap::insert( + team_size, visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id); + + // Load args once for better performance (avoid repeated loads) + using args_t = typename cuvs::neighbors::cagra::detail:: + dataset_descriptor_base_t::args_t; + const args_t args = smem_desc->args.load(); + + // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum + // Otherwise warp shuffles will hang. Each thread calls the extern function to get + // its per-thread distance, then team_sum reduces across all threads in the team. + DISTANCE_T per_thread_norm2 = 0; + if (compute_distance_flag) { + if constexpr (!has_kpq_bits_v) { + // Standard descriptor - use the metric from the descriptor type itself + per_thread_norm2 = compute_distance_standard(args, child_id); + } else { + // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself + per_thread_norm2 = compute_distance_vpq(args, child_id); + } + } + // Now ALL threads in the team participate in team_sum + DISTANCE_T norm2 = device::team_sum(per_thread_norm2, team_size_bits); + + if (compute_distance_flag) { + if ((threadIdx.x & (team_size - 1)) == 0) { + result_indices_ptr[ldd * blockIdx.y + global_team_id] = child_id; + result_distances_ptr[ldd * blockIdx.y + global_team_id] = norm2; + } + } else { + if ((threadIdx.x & (team_size - 1)) == 0) { + result_distances_ptr[ldd * blockIdx.y + global_team_id] = utils::get_max_value(); + } + } + + if constexpr (!std::is_same::value) { + if (!sample_filter( + query_id, + source_indices_ptr == nullptr ? parent_index : source_indices_ptr[parent_index])) { + parent_candidates_ptr[parent_list_index + (lds * query_id)] = utils::get_max_value(); + parent_distance_ptr[parent_list_index + (lds * query_id)] = + utils::get_max_value(); + } + } +} + +// JIT version of apply_filter_kernel - uses extern sample_filter function +// Bitset data is passed as kernel parameters (matching non-JIT where filter object contains +// bitset_view) The bitset data is in global memory (not shared memory), just like non-JIT +template +RAFT_KERNEL apply_filter_kernel_jit( + const SourceIndexT* source_indices_ptr, // [num_queries, search_width] + IndexT* const result_indices_ptr, + DistanceT* const result_distances_ptr, + const std::size_t lds, + const std::uint32_t result_buffer_size, + const std::uint32_t num_queries, + const IndexT query_id_offset, + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) - in global memory + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits +{ + constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const auto tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= result_buffer_size * num_queries) { return; } + const auto i = tid % result_buffer_size; + const auto j = tid / result_buffer_size; + const auto index = i + j * lds; + + if (result_indices_ptr[index] != ~index_msb_1_mask) { + // Use extern sample_filter function with 3 params: query_id, node_id, filter_data + // filter_data is a void* pointer to bitset_filter_data_t (or nullptr for none_filter) + SourceIndexT node_id = source_indices_ptr == nullptr + ? static_cast(result_indices_ptr[index]) + : source_indices_ptr[result_indices_ptr[index]]; + + // Construct filter_data struct in registers (bitset data is in global memory) + cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + bitset_ptr, bitset_len, original_nbits); + + if (!cuvs::neighbors::cagra::detail::sample_filter( + query_id_offset + j, node_id, bitset_ptr != nullptr ? &filter_data : nullptr)) { + result_indices_ptr[index] = utils::get_max_value(); + result_distances_ptr[index] = utils::get_max_value(); + } + } +} + +} // namespace cuvs::neighbors::cagra::detail::multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp new file mode 100644 index 0000000000..226c6c8a1f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -0,0 +1,144 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +// Include tags header before namespace (it defines a namespace) +#include + +#include +#include +#include +#include +#include + +// Use nested namespace syntax to allow inclusion from within parent namespace +namespace cuvs::neighbors::cagra::detail { +namespace multi_kernel_search { + +template +struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { + CagraMultiKernelSearchPlanner(cuvs::distance::DistanceType metric, + const std::string& kernel_name, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq = false, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + : AlgorithmPlanner( + build_entrypoint_name( + kernel_name, metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len), + is_vpq ? make_fragment_key() + : make_fragment_key()), + entrypoint_name_(build_entrypoint_name( + kernel_name, metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len)) + { + } + + const std::string& get_entrypoint_name() const { return entrypoint_name_; } + + void add_setup_workspace_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + std::string key = "setup_workspace_"; + if (is_vpq) { + key += "vpq_"; + using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + key += "_" + params; + } else { + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + params; + } + this->device_functions.push_back(key); + } + + void add_compute_distance_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + std::string key = "compute_distance_"; + if (is_vpq) { + key += "vpq_"; + using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + key += "_" + params; + } else { + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + params; + } + this->device_functions.push_back(key); + } + + void add_sample_filter_device_function(std::string filter_name) + { + this->device_functions.push_back("sample_filter_" + filter_name); + } + + void set_entrypoint_name(const std::string& name) { entrypoint_name_ = name; } + + private: + std::string entrypoint_name_; + + static std::string build_entrypoint_name(const std::string& kernel_name, + cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits, + uint32_t pq_len) + { + // Special case: apply_filter_kernel doesn't use dataset_descriptor, so no suffixes needed + if (kernel_name == "apply_filter_kernel") { return kernel_name; } + + std::string name = kernel_name; + if (is_vpq) { name += "_vpq"; } + name += "_" + metric_to_string(metric); + name += "_t" + std::to_string(team_size); + name += "_dim" + std::to_string(dataset_block_dim); + if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } + return name; + } + + static std::string metric_to_string(cuvs::distance::DistanceType metric) + { + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2Unexpanded: return "L2Expanded"; + case cuvs::distance::DistanceType::InnerProduct: return "InnerProduct"; + case cuvs::distance::DistanceType::CosineExpanded: return "CosineExpanded"; + case cuvs::distance::DistanceType::BitwiseHamming: return "BitwiseHamming"; + default: return "Unknown"; + } + } +}; + +} // namespace multi_kernel_search +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in new file mode 100644 index 0000000000..d8e1e0ee94 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -0,0 +1,42 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Instantiate the search_kernel_jit function with concrete descriptor type +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; +template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( + uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search + +#else + +#include +#include +#include "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +{ + registerAlgorithm( + "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", + embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh new file mode 100644 index 0000000000..4f76c1b224 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -0,0 +1,728 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance-ext.cuh" +#include "../compute_distance_standard-impl.cuh" +#include "../compute_distance_vpq-impl.cuh" +#include "../device_common.hpp" +#include "../hashmap.hpp" +#include "../search_single_cta_kernel-inl.cuh" +#include "../utils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../bitonic.hpp" +#include "../search_plan.cuh" +#include "../topk_by_radix.cuh" +#include "../topk_for_cagra/topk.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // For std::is_same_v +#include + +// Include extern function declarations before namespace so they're available to kernel definitions +#include "extern_device_functions.cuh" +#include "filter_data.h" +// Include shared JIT device functions +#include "device_common_jit.cuh" + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Helper to check if DescriptorT has kPqBits (VPQ descriptor) - use shared version +// Use fully qualified name since it's a template variable +using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; + +// Note: Helper functions (pickup_next_parents, move_invalid_to_end_of_list, hashmap_restore) +// are defined in search_single_cta_kernel-inl.cuh which is included by the launcher. +// We don't redefine them here to avoid duplicate definitions. + +// Sample filter extern function +// sample_filter is declared in extern_device_functions.cuh + +// JIT versions of compute_distance_to_random_nodes and compute_distance_to_child_nodes +// are now shared in device_common_jit.cuh - use fully qualified names +using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; +using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; + +// JIT version of search_core - uses extern functions with descriptor pointer +template +RAFT_DEVICE_INLINE_FUNCTION void search_core( + uintptr_t result_indices_ptr, + DistanceT* const result_distances_ptr, + const std::uint32_t top_k, + const DataT* const queries_ptr, + const IndexT* const knn_graph, + const std::uint32_t graph_degree, + const SourceIndexT* source_indices_ptr, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const IndexT* seed_ptr, + const uint32_t num_seeds, + IndexT* const visited_hashmap_ptr, + const std::uint32_t max_candidates, + const std::uint32_t max_itopk, + const std::uint32_t internal_topk, + const std::uint32_t search_width, + const std::uint32_t min_iteration, + const std::uint32_t max_iteration, + std::uint32_t* const num_executed_iterations, + const std::uint32_t hash_bitlen, + const std::uint32_t small_hash_bitlen, + const std::uint32_t small_hash_reset_interval, + const std::uint32_t query_id, + const DescriptorT* dataset_desc, // Concrete descriptor type from template + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits +{ + using LOAD_T = device::LOAD_128BIT_T; + + auto to_source_index = [source_indices_ptr](IndexT x) { + return source_indices_ptr == nullptr ? static_cast(x) : source_indices_ptr[x]; + }; + +#ifdef _CLK_BREAKDOWN + std::uint64_t clk_init = 0; + std::uint64_t clk_compute_1st_distance = 0; + std::uint64_t clk_topk = 0; + std::uint64_t clk_reset_hash = 0; + std::uint64_t clk_pickup_parents = 0; + std::uint64_t clk_restore_hash = 0; + std::uint64_t clk_compute_distance = 0; + std::uint64_t clk_start; +#define _CLK_START() clk_start = clock64() +#define _CLK_REC(V) V += clock64() - clk_start; +#else +#define _CLK_START() +#define _CLK_REC(V) +#endif + _CLK_START(); + + extern __shared__ uint8_t smem[]; + + // Layout of result_buffer + const auto result_buffer_size = internal_topk + (search_width * graph_degree); + const auto result_buffer_size_32 = raft::round_up_safe(result_buffer_size, 32); + const auto small_hash_size = hashmap::get_size(small_hash_bitlen); + + // Get smem_ws_size_in_bytes using static method (dim is in descriptor args) + uint32_t dim = dataset_desc->args.dim; + uint32_t smem_ws_size_in_bytes = DescriptorT::get_smem_ws_size_in_bytes(dim); + + // Set smem working buffer for the distance calculation using extern function + // setup_workspace copies the descriptor to shared memory and returns pointer to smem descriptor + // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) + const DescriptorT* smem_desc = nullptr; + // Check if DescriptorT is a standard_dataset_descriptor_t by checking if it doesn't have kPqBits + // (standard descriptors don't have kPqBits, VPQ descriptors do) + if constexpr (!has_kpq_bits_v) { + // Standard descriptor - use the metric from the descriptor type itself + // DescriptorT should already be standard_dataset_descriptor_t where Metric matches + // DescriptorT::kMetric + smem_desc = setup_workspace_standard(dataset_desc, smem, queries_ptr, query_id); + } else { + // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself + // DescriptorT should already be cagra_q_dataset_descriptor_t where Metric matches + // DescriptorT::kMetric + smem_desc = setup_workspace_vpq(dataset_desc, smem, queries_ptr, query_id); + } + + auto* __restrict__ result_indices_buffer = + reinterpret_cast(smem + smem_ws_size_in_bytes); + auto* __restrict__ result_distances_buffer = + reinterpret_cast(result_indices_buffer + result_buffer_size_32); + auto* __restrict__ visited_hash_buffer = + reinterpret_cast(result_distances_buffer + result_buffer_size_32); + auto* __restrict__ parent_list_buffer = + reinterpret_cast(visited_hash_buffer + small_hash_size); + auto* __restrict__ topk_ws = reinterpret_cast(parent_list_buffer + search_width); + auto* terminate_flag = reinterpret_cast(topk_ws + 3); + auto* __restrict__ smem_work_ptr = reinterpret_cast(terminate_flag + 1); + + // A flag for filtering. + auto filter_flag = terminate_flag; + + if (threadIdx.x == 0) { + terminate_flag[0] = 0; + topk_ws[0] = ~0u; + } + + // Init hashmap + IndexT* local_visited_hashmap_ptr; + if (small_hash_bitlen) { + local_visited_hashmap_ptr = visited_hash_buffer; + } else { + local_visited_hashmap_ptr = visited_hashmap_ptr + (hashmap::get_size(hash_bitlen) * blockIdx.y); + } + hashmap::init(local_visited_hashmap_ptr, hash_bitlen, 0); + __syncthreads(); + _CLK_REC(clk_init); + + // compute distance to randomly selecting nodes using JIT version + _CLK_START(); + const IndexT* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; + IndexT dataset_size = smem_desc->size; + compute_distance_to_random_nodes_jit( + result_indices_buffer, + result_distances_buffer, + smem_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0); + __syncthreads(); + _CLK_REC(clk_compute_1st_distance); + + std::uint32_t iter = 0; + while (1) { + // sort + if constexpr (TOPK_BY_BITONIC_SORT) { + assert(blockDim.x >= 64); + const bool bitonic_sort_and_full_multi_warps = (max_candidates > 128) ? true : false; + + // reset small-hash table. + if ((iter + 1) % small_hash_reset_interval == 0) { + _CLK_START(); + unsigned hash_start_tid; + if (blockDim.x == 32) { + hash_start_tid = 0; + } else if (blockDim.x == 64) { + if (bitonic_sort_and_full_multi_warps || BITONIC_SORT_AND_MERGE_MULTI_WARPS) { + hash_start_tid = 0; + } else { + hash_start_tid = 32; + } + } else { + if (bitonic_sort_and_full_multi_warps || BITONIC_SORT_AND_MERGE_MULTI_WARPS) { + hash_start_tid = 64; + } else { + hash_start_tid = 32; + } + } + hashmap::init(local_visited_hashmap_ptr, hash_bitlen, hash_start_tid); + _CLK_REC(clk_reset_hash); + } + + // topk with bitonic sort + _CLK_START(); + // For JIT version, we always check filter_flag at runtime since sample_filter is extern + if (*filter_flag != 0) { + // Move the filtered out index to the end of the itopk list + for (unsigned i = 0; i < search_width; i++) { + move_invalid_to_end_of_list( + result_indices_buffer, result_distances_buffer, internal_topk); + } + if (threadIdx.x == 0) { *terminate_flag = 0; } + } + topk_by_bitonic_sort_and_merge( + result_distances_buffer, + result_indices_buffer, + max_itopk, + internal_topk, + result_distances_buffer + internal_topk, + result_indices_buffer + internal_topk, + max_candidates, + search_width * graph_degree, + topk_ws, + (iter == 0)); + __syncthreads(); + _CLK_REC(clk_topk); + } else { + _CLK_START(); + // topk with radix block sort + topk_by_radix_sort{}(max_itopk, + internal_topk, + result_buffer_size, + reinterpret_cast(result_distances_buffer), + result_indices_buffer, + reinterpret_cast(result_distances_buffer), + result_indices_buffer, + nullptr, + topk_ws, + true, + smem_work_ptr); + _CLK_REC(clk_topk); + + // reset small-hash table + if ((iter + 1) % small_hash_reset_interval == 0) { + _CLK_START(); + hashmap::init(local_visited_hashmap_ptr, hash_bitlen); + _CLK_REC(clk_reset_hash); + } + } + __syncthreads(); + + if (iter + 1 == max_iteration) { break; } + + // pick up next parents + if (threadIdx.x < 32) { + _CLK_START(); + pickup_next_parents( + terminate_flag, parent_list_buffer, result_indices_buffer, internal_topk, search_width); + _CLK_REC(clk_pickup_parents); + } + + // restore small-hash table by putting internal-topk indices in it + if ((iter + 1) % small_hash_reset_interval == 0) { + const unsigned first_tid = ((blockDim.x <= 32) ? 0 : 32); + _CLK_START(); + hashmap_restore( + local_visited_hashmap_ptr, hash_bitlen, result_indices_buffer, internal_topk, first_tid); + _CLK_REC(clk_restore_hash); + } + __syncthreads(); + + if (*terminate_flag && iter >= min_iteration) { break; } + + __syncthreads(); + // compute the norms between child nodes and query node using JIT version + _CLK_START(); + compute_distance_to_child_nodes_jit( + result_indices_buffer + internal_topk, + result_distances_buffer + internal_topk, + smem_desc, + knn_graph, + graph_degree, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0, + parent_list_buffer, + result_indices_buffer, + search_width); + // Critical: __syncthreads() must be reached by ALL threads + // If any thread is stuck in compute_distance_to_child_nodes_jit, this will hang + __syncthreads(); + _CLK_REC(clk_compute_distance); + + // Filtering - use extern sample_filter function + if (threadIdx.x == 0) { *filter_flag = 0; } + __syncthreads(); + + constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const IndexT invalid_index = utils::get_max_value(); + + for (unsigned p = threadIdx.x; p < search_width; p += blockDim.x) { + if (parent_list_buffer[p] != invalid_index) { + const auto parent_id = result_indices_buffer[parent_list_buffer[p]] & ~index_msb_1_mask; + // Construct filter_data struct (bitset data is in global memory) + cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + bitset_ptr, bitset_len, original_nbits); + if (!sample_filter(query_id, + to_source_index(parent_id), + bitset_ptr != nullptr ? &filter_data : nullptr)) { + result_distances_buffer[parent_list_buffer[p]] = utils::get_max_value(); + result_indices_buffer[parent_list_buffer[p]] = invalid_index; + *filter_flag = 1; + } + } + } + __syncthreads(); + + iter++; + } + + // Post process for filtering - use extern sample_filter function + constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const IndexT invalid_index = utils::get_max_value(); + + for (unsigned i = threadIdx.x; i < internal_topk + search_width * graph_degree; i += blockDim.x) { + const auto node_id = result_indices_buffer[i] & ~index_msb_1_mask; + // Construct filter_data struct (bitset data is in global memory) + cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + bitset_ptr, bitset_len, original_nbits); + if (node_id != (invalid_index & ~index_msb_1_mask) && + !sample_filter( + query_id, to_source_index(node_id), bitset_ptr != nullptr ? &filter_data : nullptr)) { + result_distances_buffer[i] = utils::get_max_value(); + result_indices_buffer[i] = invalid_index; + } + } + + __syncthreads(); + // Move invalid index items to the end of the buffer without sorting the entire buffer + using scan_op_t = cub::WarpScan; + auto& temp_storage = *reinterpret_cast(smem_work_ptr); + + constexpr std::uint32_t warp_size = 32; + if (threadIdx.x < warp_size) { + std::uint32_t num_found_valid = 0; + for (std::uint32_t buffer_offset = 0; buffer_offset < internal_topk; + buffer_offset += warp_size) { + const auto src_position = buffer_offset + threadIdx.x; + const std::uint32_t is_valid_index = + (result_indices_buffer[src_position] & (~index_msb_1_mask)) == invalid_index ? 0 : 1; + std::uint32_t new_position; + scan_op_t(temp_storage).InclusiveSum(is_valid_index, new_position); + if (is_valid_index) { + const auto dst_position = num_found_valid + (new_position - 1); + result_indices_buffer[dst_position] = result_indices_buffer[src_position]; + result_distances_buffer[dst_position] = result_distances_buffer[src_position]; + } + + num_found_valid += new_position; + for (std::uint32_t offset = (warp_size >> 1); offset > 0; offset >>= 1) { + const auto v = raft::shfl_xor(num_found_valid, offset); + if ((threadIdx.x & offset) == 0) { num_found_valid = v; } + } + + if (num_found_valid >= top_k) { break; } + } + + if (num_found_valid < top_k) { + for (std::uint32_t i = num_found_valid + threadIdx.x; i < internal_topk; i += warp_size) { + result_indices_buffer[i] = invalid_index; + result_distances_buffer[i] = utils::get_max_value(); + } + } + } + + // If the sufficient number of valid indexes are not in the internal topk, pick up from the + // candidate list. + if (top_k > internal_topk || result_indices_buffer[top_k - 1] == invalid_index) { + __syncthreads(); + topk_by_bitonic_sort_and_merge( + result_distances_buffer, + result_indices_buffer, + max_itopk, + internal_topk, + result_distances_buffer + internal_topk, + result_indices_buffer + internal_topk, + max_candidates, + search_width * graph_degree, + topk_ws, + (iter == 0)); + } + __syncthreads(); + + // NB: The indices pointer is tagged with its element size. + const uint32_t index_element_tag = result_indices_ptr & 0x3; + result_indices_ptr ^= index_element_tag; + auto write_indices = + index_element_tag == 3 + ? [](uintptr_t ptr, + uint32_t i, + SourceIndexT x) { reinterpret_cast(ptr)[i] = static_cast(x); } + : index_element_tag == 2 + ? [](uintptr_t ptr, + uint32_t i, + SourceIndexT x) { reinterpret_cast(ptr)[i] = static_cast(x); } + : index_element_tag == 1 + ? [](uintptr_t ptr, + uint32_t i, + SourceIndexT x) { reinterpret_cast(ptr)[i] = static_cast(x); } + : [](uintptr_t ptr, uint32_t i, SourceIndexT x) { + reinterpret_cast(ptr)[i] = static_cast(x); + }; + for (std::uint32_t i = threadIdx.x; i < top_k; i += blockDim.x) { + unsigned j = i + (top_k * query_id); + unsigned ii = i; + if constexpr (TOPK_BY_BITONIC_SORT) { ii = device::swizzling(i); } + if (result_distances_ptr != nullptr) { result_distances_ptr[j] = result_distances_buffer[ii]; } + + auto internal_index = + result_indices_buffer[ii] & ~index_msb_1_mask; // clear most significant bit + auto source_index = to_source_index(internal_index); + write_indices(result_indices_ptr, j, source_index); + } + if (threadIdx.x == 0 && num_executed_iterations != nullptr) { + num_executed_iterations[query_id] = iter + 1; + } +#ifdef _CLK_BREAKDOWN + if ((threadIdx.x == 0 || threadIdx.x == blockDim.x - 1) && ((query_id * 3) % gridDim.y < 3)) { + printf( + "%s:%d " + "query, %d, thread, %d" + ", init, %lu" + ", 1st_distance, %lu" + ", topk, %lu" + ", reset_hash, %lu" + ", pickup_parents, %lu" + ", restore_hash, %lu" + ", distance, %lu" + "\n", + __FILE__, + __LINE__, + query_id, + threadIdx.x, + clk_init, + clk_compute_1st_distance, + clk_topk, + clk_reset_hash, + clk_pickup_parents, + clk_restore_hash, + clk_compute_distance); + } +#endif +} + +// JIT kernel wrapper - calls search_core +template +RAFT_KERNEL __launch_bounds__(1024, 1) + search_kernel_jit(uintptr_t result_indices_ptr, + DistanceT* const result_distances_ptr, + const std::uint32_t top_k, + const DataT* const queries_ptr, + const IndexT* const knn_graph, + const std::uint32_t graph_degree, + const SourceIndexT* source_indices_ptr, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const IndexT* seed_ptr, + const uint32_t num_seeds, + IndexT* const visited_hashmap_ptr, + const std::uint32_t max_candidates, + const std::uint32_t max_itopk, + const std::uint32_t internal_topk, + const std::uint32_t search_width, + const std::uint32_t min_iteration, + const std::uint32_t max_iteration, + std::uint32_t* const num_executed_iterations, + const std::uint32_t hash_bitlen, + const std::uint32_t small_hash_bitlen, + const std::uint32_t small_hash_reset_interval, + const DescriptorT* dataset_desc, // Concrete descriptor type from template + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits +{ + const auto query_id = blockIdx.y; + search_core(result_indices_ptr, + result_distances_ptr, + top_k, + queries_ptr, + knn_graph, + graph_degree, + source_indices_ptr, + num_distilation, + rand_xor_mask, + seed_ptr, + num_seeds, + visited_hashmap_ptr, + max_candidates, + max_itopk, + internal_topk, + search_width, + min_iteration, + max_iteration, + num_executed_iterations, + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + query_id, + dataset_desc, + bitset_ptr, + bitset_len, + original_nbits); +} + +// No separate JIT types needed - use non-JIT types directly +// Helper descriptor type for job_desc_t +template +struct job_desc_jit_helper_desc { + using DATA_T = DataT; + using INDEX_T = IndexT; + using DISTANCE_T = DistanceT; +}; + +// JIT persistent kernel - uses extern functions and JIT search_core +template +RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_p_jit( + worker_handle_t* worker_handles, + job_desc_t>* job_descriptors, + uint32_t* completion_counters, + const IndexT* const knn_graph, // [dataset_size, graph_degree] + const std::uint32_t graph_degree, + const SourceIndexT* source_indices_ptr, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const IndexT* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + IndexT* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + const std::uint32_t max_candidates, + const std::uint32_t max_itopk, + const std::uint32_t internal_topk, + const std::uint32_t search_width, + const std::uint32_t min_iteration, + const std::uint32_t max_iteration, + std::uint32_t* const num_executed_iterations, // [num_queries] + const std::uint32_t hash_bitlen, + const std::uint32_t small_hash_bitlen, + const std::uint32_t small_hash_reset_interval, + const DescriptorT* dataset_desc, // Concrete descriptor type from template + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits +{ + using job_desc_type = job_desc_t>; + __shared__ typename job_desc_type::input_t job_descriptor; + __shared__ worker_handle_t::data_t worker_data; + + auto& worker_handle = worker_handles[blockIdx.y].data; + uint32_t job_ix; + + while (true) { + // wait the writing phase + if (threadIdx.x == 0) { + worker_handle_t::data_t worker_data_local; + do { + worker_data_local = worker_handle.load(cuda::memory_order_relaxed); + } while (worker_data_local.handle == kWaitForWork); + if (worker_data_local.handle != kNoMoreWork) { + worker_handle.store({kWaitForWork}, cuda::memory_order_relaxed); + } + job_ix = worker_data_local.value.desc_id; + cuda::atomic_thread_fence(cuda::memory_order_acquire, cuda::thread_scope_system); + worker_data = worker_data_local; + } + if (threadIdx.x < raft::WarpSize) { + // Sync one warp and copy descriptor data + static_assert(job_desc_type::kBlobSize <= raft::WarpSize); + constexpr uint32_t kMaxJobsNum = 8192; + job_ix = raft::shfl(job_ix, 0); + if (threadIdx.x < job_desc_type::kBlobSize && job_ix < kMaxJobsNum) { + job_descriptor.blob[threadIdx.x] = job_descriptors[job_ix].input.blob[threadIdx.x]; + } + } + __syncthreads(); + if (worker_data.handle == kNoMoreWork) { break; } + + // reading phase + auto result_indices_ptr = job_descriptor.value.result_indices_ptr; + auto* result_distances_ptr = job_descriptor.value.result_distances_ptr; + auto* queries_ptr = job_descriptor.value.queries_ptr; + auto top_k = job_descriptor.value.top_k; + auto n_queries = job_descriptor.value.n_queries; + auto query_id = worker_data.value.query_id; + + // work phase - use JIT search_core + search_core(result_indices_ptr, + result_distances_ptr, + top_k, + queries_ptr, + knn_graph, + graph_degree, + source_indices_ptr, + num_distilation, + rand_xor_mask, + seed_ptr, + num_seeds, + visited_hashmap_ptr, + max_candidates, + max_itopk, + internal_topk, + search_width, + min_iteration, + max_iteration, + num_executed_iterations, + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + query_id, + dataset_desc, + bitset_ptr, + bitset_len, + original_nbits); + + // make sure all writes are visible even for the host + // (e.g. when result buffers are in pinned memory) + cuda::atomic_thread_fence(cuda::memory_order_release, cuda::thread_scope_system); + + // arrive to mark the end of the work phase + __syncthreads(); + if (threadIdx.x == 0) { + auto completed_count = atomicInc(completion_counters + job_ix, n_queries - 1) + 1; + if (completed_count >= n_queries) { + job_descriptors[job_ix].completion_flag.store(true, cuda::memory_order_relaxed); + } + } + } +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in new file mode 100644 index 0000000000..2fbae21acc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in @@ -0,0 +1,42 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Instantiate the search_kernel_p_jit function with concrete descriptor type +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; +template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( + worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search + +#else + +#include +#include +#include "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +{ + registerAlgorithm( + "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", + embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in new file mode 100644 index 0000000000..ad8e1792a9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in @@ -0,0 +1,43 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Instantiate the search_kernel_p_jit function with concrete VPQ descriptor type +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; +template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( + worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search + +#else + +#include +#include +#include "search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +{ + registerAlgorithm( + "search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in new file mode 100644 index 0000000000..3f4f13e9ca --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in @@ -0,0 +1,43 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Instantiate the search_kernel_jit function with concrete VPQ descriptor type +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; +template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( + uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search + +#else + +#include +#include +#include "search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +{ + registerAlgorithm( + "search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index 4f0492fea9..ae40c4cb6f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -5,23 +5,59 @@ #pragma once +// Include tags header before namespace (it defines a namespace) +#include + #include #include #include -#include #include #include +#include #include -namespace cuvs::neighbors::cagra::detail::single_cta_search { +// Use nested namespace syntax to allow inclusion from within parent namespace +namespace cuvs { +namespace neighbors { +namespace cagra { +namespace detail { +namespace single_cta_search { template struct CagraSearchPlanner : AlgorithmPlanner { - CagraSearchPlanner(bool topk_by_bitonic_sort, bool bitonic_sort_and_merge_multi_warps) - : AlgorithmPlanner("search_single_cta_kernel_" + bool_to_string(topk_by_bitonic_sort) + "_" + - bool_to_string(bitonic_sort_and_merge_multi_warps), - make_fragment_key()) + CagraSearchPlanner(cuvs::distance::DistanceType metric, + bool topk_by_bitonic_sort, + bool bitonic_sort_and_merge_multi_warps, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq = false, + uint32_t pq_bits = 0, + uint32_t pq_len = 0, + bool persistent = false) + : AlgorithmPlanner(build_entrypoint_name(metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + team_size, + dataset_block_dim, + is_vpq, + pq_bits, + pq_len, + persistent), + is_vpq + ? make_fragment_key() + : make_fragment_key()) { + std::string kernel_type = persistent ? "persistent" : "regular"; + std::cerr << "[JIT] CagraSearchPlanner created for " << kernel_type + << " JIT kernel (topk_by_bitonic_sort=" << bool_to_string(topk_by_bitonic_sort) + << ", bitonic_sort_and_merge_multi_warps=" + << bool_to_string(bitonic_sort_and_merge_multi_warps) + << ", metric=" << metric_to_string(metric) << ")" << std::endl; + std::cerr.flush(); } void add_setup_workspace_device_function(cuvs::distance::DistanceType metric, @@ -32,10 +68,25 @@ struct CagraSearchPlanner : AlgorithmPlanner { uint32_t pq_len = 0) { std::string key = "setup_workspace_"; - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - if (is_vpq) { key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } + if (is_vpq) { + key += "vpq_"; + // For VPQ, include codebook type tag in template parameters + using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; + // Use template tags only for types, strings for integers/enums + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + key += "_" + params; + } else { + // Use template tags only for types, strings for integers/enums + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + params; + } this->device_functions.push_back(key); } @@ -47,10 +98,25 @@ struct CagraSearchPlanner : AlgorithmPlanner { uint32_t pq_len = 0) { std::string key = "compute_distance_"; - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - if (is_vpq) { key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } + if (is_vpq) { + key += "vpq_"; + // For VPQ, include codebook type tag in template parameters + using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; + // Use template tags only for types, strings for integers/enums + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + key += "_" + params; + } else { + // Use template tags only for types, strings for integers/enums + auto params = make_fragment_key(); + key += metric_to_string(metric); + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + params; + } this->device_functions.push_back(key); } @@ -60,6 +126,27 @@ struct CagraSearchPlanner : AlgorithmPlanner { } private: + static std::string build_entrypoint_name(cuvs::distance::DistanceType metric, + bool topk_by_bitonic_sort, + bool bitonic_sort_and_merge_multi_warps, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits, + uint32_t pq_len, + bool persistent) + { + std::string name = (persistent ? "search_single_cta_kernel_p_" : "search_single_cta_kernel_"); + if (is_vpq) { name += "vpq_"; } + name += bool_to_string(topk_by_bitonic_sort) + "_"; + name += bool_to_string(bitonic_sort_and_merge_multi_warps) + "_"; + name += metric_to_string(metric); + name += "_t" + std::to_string(team_size); + name += "_dim" + std::to_string(dataset_block_dim); + if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } + return name; + } + static std::string bool_to_string(bool b) { return b ? "true" : "false"; } static std::string metric_to_string(cuvs::distance::DistanceType metric) @@ -75,4 +162,8 @@ struct CagraSearchPlanner : AlgorithmPlanner { } }; -} // namespace cuvs::neighbors::cagra::detail::single_cta_search +} // namespace single_cta_search +} // namespace detail +} // namespace cagra +} // namespace neighbors +} // namespace cuvs diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in index 98399e1355..1eeac8c6ba 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in @@ -9,13 +9,12 @@ #include -namespace cuvs::neighbors::cagra::detail::single_cta_search { +namespace cuvs::neighbors::cagra::detail { // Instantiate the setup_workspace_standard function for standard descriptor -template __device__ uint32_t setup_workspace_standard<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( - void*, const @data_type@*, uint32_t, const @data_type@*, @index_type@, uint32_t, uint32_t, const @distance_type@*); +template __device__ const cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>* setup_workspace_standard<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>(const cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); -} // namespace cuvs::neighbors::cagra::detail::single_cta_search +} // namespace cuvs::neighbors::cagra::detail #else @@ -30,7 +29,7 @@ __attribute__((__constructor__)) static void register_setup_workspace_standard_@ registerAlgorithm( - "setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@", + "setup_workspace_@metric_name@_t@team_size@_dim@dataset_block_dim@", embedded_setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, sizeof(embedded_setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh index 5283cc9659..30956a17be 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh @@ -12,44 +12,37 @@ #include #include -namespace cuvs::neighbors::cagra::detail::single_cta_search { +namespace cuvs::neighbors::cagra::detail { // Extern function implementation for setup_workspace_standard (standard descriptor) +// Takes the concrete descriptor pointer and calls the free function directly (not through function +// pointer) For JIT LTO, the descriptor's setup_workspace_impl is nullptr, so we must call the free +// function directly template -__device__ uint32_t setup_workspace_standard(void* smem, - const DataT* queries, - uint32_t query_id, - const DataT* dataset_ptr, - IndexT dataset_size, - uint32_t dim, - uint32_t ld, - const DistanceT* dataset_norms) +__device__ const cuvs::neighbors::cagra::detail:: + standard_dataset_descriptor_t* + setup_workspace_standard( + const cuvs::neighbors::cagra::detail:: + standard_dataset_descriptor_t* + desc, + void* smem, + const DataT* queries, + uint32_t query_id) { - using desc_type = cuvs::neighbors::cagra::detail:: + // CRITICAL: This function uses __syncthreads() and expects ALL threads to call it + // If only thread 0 calls it, __syncthreads() will hang forever + // Call the free function directly (not desc->setup_workspace() which uses a function pointer) + // The free function is in compute_distance_standard-impl.cuh + using desc_t = cuvs::neighbors::cagra::detail:: standard_dataset_descriptor_t; - - // Create a temporary descriptor on the stack - desc_type temp_desc(reinterpret_cast( - &cuvs::neighbors::cagra::detail::setup_workspace_standard), - reinterpret_cast( - &cuvs::neighbors::cagra::detail::compute_distance_standard), - dataset_ptr, - dataset_size, - dim, - ld, - dataset_norms); - - // Call the free function setup_workspace_standard which copies descriptor to smem - const desc_type* result = cuvs::neighbors::cagra::detail::setup_workspace_standard( - &temp_desc, smem, queries, query_id); - - // Return the smem_ws_ptr from the descriptor's args - return result->args.smem_ws_ptr; + const auto* result = + cuvs::neighbors::cagra::detail::setup_workspace_standard(desc, smem, queries, query_id); + return result; } -} // namespace cuvs::neighbors::cagra::detail::single_cta_search +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in index 6501272572..c81268e2c5 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in @@ -9,13 +9,12 @@ #include -namespace cuvs::neighbors::cagra::detail::single_cta_search { +namespace cuvs::neighbors::cagra::detail { // Instantiate the setup_workspace_vpq function for VPQ descriptor -template __device__ uint32_t setup_workspace_vpq<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( - void*, const @data_type@*, uint32_t, const uint8_t*, uint32_t, const @codebook_type@*, const @codebook_type@*, @index_type@, uint32_t); +template __device__ const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>* setup_workspace_vpq<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); -} // namespace cuvs::neighbors::cagra::detail::single_cta_search +} // namespace cuvs::neighbors::cagra::detail #else @@ -29,7 +28,8 @@ __attribute__((__constructor__)) static void register_setup_workspace_vpq_@metri { registerAlgorithm( + tag_dist_@dist_abbrev@, + tag_codebook_half>( "setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", embedded_setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, sizeof(embedded_setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh index f0245030b2..e38f3ecbe8 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh @@ -12,9 +12,12 @@ #include #include -namespace cuvs::neighbors::cagra::detail::single_cta_search { +namespace cuvs::neighbors::cagra::detail { // Extern function implementation for setup_workspace_vpq (VPQ descriptor) +// Takes the concrete descriptor pointer and calls the free function directly (not through function +// pointer) For JIT LTO, the descriptor's setup_workspace_impl is nullptr, so we must call the free +// function directly template -__device__ uint32_t setup_workspace_vpq(void* smem, - const DataT* queries, - uint32_t query_id, - const uint8_t* encoded_dataset_ptr, - uint32_t encoded_dataset_dim, - const CodebookT* vq_code_book_ptr, - const CodebookT* pq_code_book_ptr, - IndexT dataset_size, - uint32_t dim) +__device__ const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t* +setup_workspace_vpq( + const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t* desc, + void* smem, + const DataT* queries, + uint32_t query_id) { - using desc_type = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; - - // Create a temporary descriptor on the stack - desc_type temp_desc(reinterpret_cast( - &cuvs::neighbors::cagra::detail::setup_workspace_vpq), - reinterpret_cast( - &cuvs::neighbors::cagra::detail::compute_distance_vpq), - encoded_dataset_ptr, - encoded_dataset_dim, - vq_code_book_ptr, - pq_code_book_ptr, - dataset_size, - dim); - - // Call the free function setup_workspace_vpq which copies descriptor to smem - const desc_type* result = cuvs::neighbors::cagra::detail::setup_workspace_vpq( - &temp_desc, smem, queries, query_id); - - // Return the smem_ws_ptr from the descriptor's args - return result->args.smem_ws_ptr; + // Call the free function directly (not desc->setup_workspace() which uses a function pointer) + // The free function is in compute_distance_vpq-impl.cuh + using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; + const auto* result = + cuvs::neighbors::cagra::detail::setup_workspace_vpq(desc, smem, queries, query_id); + return result; } -} // namespace cuvs::neighbors::cagra::detail::single_cta_search +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index c8b885dffe..7d643b29f5 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -14,6 +14,10 @@ #include "topk_for_cagra/topk.h" // TODO replace with raft topk if possible #include "utils.hpp" +#ifdef CUVS_ENABLE_JIT_LTO +#include "search_multi_cta_kernel_launcher_jit.cuh" +#endif + #include #include #include @@ -452,7 +456,18 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel( uint32_t k = j + (itopk_size * (cta_id + (num_cta_per_query * query_id))); result_indices_ptr[k] = index & ~index_msb_1_mask; if (result_distances_ptr != nullptr) { - result_distances_ptr[k] = result_distances_buffer[i]; + DISTANCE_T dist = result_distances_buffer[i]; + result_distances_ptr[k] = dist; + // Debug: print first query, first CTA, first few results + if (query_id == 0 && cta_id == 0 && j < 5) { + printf("NON-JIT: query=%u cta=%u j=%u i=%u idx=%u dist=%.6f\n", + query_id, + cta_id, + j, + i, + index & ~index_msb_1_mask, + (float)dist); + } } } else { // If it is valid and registered in the traversed hash table but is @@ -501,33 +516,7 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel( #endif } -template -RAFT_KERNEL set_value_batch_kernel(T* const dev_ptr, - const std::size_t ld, - const T val, - const std::size_t count, - const std::size_t batch_size) -{ - const auto tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= count * batch_size) { return; } - const auto batch_id = tid / count; - const auto elem_id = tid % count; - dev_ptr[elem_id + ld * batch_id] = val; -} - -template -void set_value_batch(T* const dev_ptr, - const std::size_t ld, - const T val, - const std::size_t count, - const std::size_t batch_size, - cudaStream_t cuda_stream) -{ - constexpr std::uint32_t block_size = 256; - const auto grid_size = (count * batch_size + block_size - 1) / block_size; - set_value_batch_kernel - <<>>(dev_ptr, ld, val, count, batch_size); -} +#include "set_value_batch.cuh" template struct search_kernel_config { @@ -573,6 +562,31 @@ void select_and_run(const dataset_descriptor_host& dat SampleFilterT sample_filter, cudaStream_t stream) { +#ifdef CUVS_ENABLE_JIT_LTO + // Use JIT version when JIT is enabled + select_and_run_jit(dataset_desc, + graph, + source_indices_ptr, + topk_indices_ptr, + topk_distances_ptr, + queries_ptr, + num_queries, + dev_seed_ptr, + num_executed_iterations, + ps, + topk, + block_size, + result_buffer_size, + smem_size, + visited_hash_bitlen, + traversed_hash_bitlen, + traversed_hashmap_ptr, + num_cta_per_query, + num_seeds, + sample_filter, + stream); +#else + // Non-JIT path auto kernel = search_kernel_config, SourceIndexT, @@ -628,6 +642,7 @@ void select_and_run(const dataset_descriptor_host& dat ps.max_iterations, num_executed_iterations, sample_filter); +#endif } } // namespace multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh new file mode 100644 index 0000000000..90c3a61be2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -0,0 +1,283 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#ifndef CUVS_ENABLE_JIT_LTO +#error "search_multi_cta_kernel_launcher_jit.cuh included but CUVS_ENABLE_JIT_LTO not defined!" +#endif + +// Include tags header before any other includes that might open namespaces +#include + +#include "compute_distance.hpp" // For dataset_descriptor_host +#include "jit_lto_kernels/search_multi_cta_planner.hpp" +#include "search_plan.cuh" // For search_params +#include "set_value_batch.cuh" // For set_value_batch +#include +#include +#include +#include +#include + +#include +#include +#include +// Note: We don't include search_multi_cta_kernel_jit.cuh here because: +// - The launcher doesn't need the kernel function definitions +// - The kernel is dispatched via the JIT LTO launcher system +// - Including it would pull in impl files that cause namespace issues + +namespace cuvs::neighbors::cagra::detail::multi_cta_search { + +// Helper functions to get tags for JIT LTO +namespace { +template +constexpr auto get_data_type_tag() +{ + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_f{}; } + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_h{}; } + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_sc{}; } + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_uc{}; } +} + +template +constexpr auto get_index_type_tag() +{ + if constexpr (std::is_same_v) { + return cuvs::neighbors::cagra::detail::tag_idx_ui{}; + } +} + +template +constexpr auto get_distance_type_tag() +{ + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_dist_f{}; } +} + +template +constexpr auto get_source_index_type_tag() +{ + if constexpr (std::is_same_v) { + return cuvs::neighbors::cagra::detail::tag_idx_ui{}; + } +} + +template +std::string get_sample_filter_name() +{ + if constexpr (std::is_same_v) { + return "filter_none"; + } else if constexpr ( + std::is_same_v> || + std::is_same_v>) { + return "filter_bitset"; + } else { + // Default to none filter for unknown types + return "filter_none"; + } +} +} // namespace + +// JIT version of select_and_run for multi_cta +template +void select_and_run_jit( + const dataset_descriptor_host& dataset_desc, + raft::device_matrix_view graph, + const SourceIndexT* source_indices_ptr, + IndexT* topk_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] + DistanceT* topk_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] + const DataT* queries_ptr, // [num_queries, dataset_dim] + uint32_t num_queries, + const IndexT* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* num_executed_iterations, // [num_queries,] + const search_params& ps, + uint32_t topk, + // multi_cta_search (params struct) + uint32_t block_size, // + uint32_t result_buffer_size, + uint32_t smem_size, + uint32_t visited_hash_bitlen, + int64_t traversed_hash_bitlen, + IndexT* traversed_hashmap_ptr, + uint32_t num_cta_per_query, + uint32_t num_seeds, + SampleFilterT sample_filter, + cudaStream_t stream) +{ + std::cerr << "[JIT] select_and_run_jit (multi_cta) called (num_queries=" << num_queries + << ", topk=" << topk << ", num_cta_per_query=" << num_cta_per_query << ")" << std::endl; + std::cerr.flush(); + + // Extract bitset data from filter object (if it's a bitset_filter) + uint32_t* bitset_ptr = nullptr; + SourceIndexT bitset_len = 0; + SourceIndexT original_nbits = 0; + + if constexpr (!std::is_same_v) { + // Try to extract bitset data from the filter + if constexpr (std::is_same_v< + SampleFilterT, + cuvs::neighbors::filtering::bitset_filter>) { + auto bitset_view = sample_filter.view(); + bitset_ptr = const_cast(bitset_view.data()); + bitset_len = static_cast(bitset_view.size()); + original_nbits = static_cast(bitset_view.get_original_nbits()); + } + } + + // Create planner with tags + using DataTag = decltype(get_data_type_tag()); + using IndexTag = decltype(get_index_type_tag()); + using DistTag = decltype(get_distance_type_tag()); + using SourceTag = decltype(get_source_index_type_tag()); + + std::cerr << "[JIT] Using JIT path for CAGRA multi_cta search" << std::endl; + std::cerr.flush(); + + // For multi_cta, we don't use topk_by_bitonic_sort or bitonic_sort_and_merge_multi_warps + // These are handled inside the kernel based on max_elements + // We need to construct the entrypoint name manually since it's different from single_cta + std::string metric_name_full; + if (dataset_desc.metric == cuvs::distance::DistanceType::L2Expanded) { + metric_name_full = "L2Expanded"; + } else if (dataset_desc.metric == cuvs::distance::DistanceType::InnerProduct) { + metric_name_full = "InnerProduct"; + } else if (dataset_desc.metric == cuvs::distance::DistanceType::CosineExpanded) { + metric_name_full = "CosineExpanded"; + } else { + RAFT_FAIL("Unsupported metric for multi_cta JIT kernel"); + } + + // Debug: Check if this is VPQ + std::cerr << "[JIT] Dataset descriptor - is_vpq: " << dataset_desc.is_vpq + << ", pq_bits: " << dataset_desc.pq_bits << ", pq_len: " << dataset_desc.pq_len + << std::endl; + std::cerr.flush(); + + // Create planner and register device functions + // Pass team_size, dataset_block_dim, and VPQ parameters to match the kernel entrypoint name + CagraMultiCtaSearchPlanner planner( + dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_sample_filter_device_function(get_sample_filter_name()); + + // Get launcher using the planner's entrypoint name and fragment key + auto params = make_fragment_key(); + auto launcher = planner.get_launcher(); + + uint32_t max_elements{}; + if (result_buffer_size <= 64) { + max_elements = 64; + } else if (result_buffer_size <= 128) { + max_elements = 128; + } else if (result_buffer_size <= 256) { + max_elements = 256; + } else { + THROW("Result buffer size %u larger than max buffer size %u", result_buffer_size, 256); + } + + RAFT_CUDA_TRY(cudaFuncSetAttribute( + launcher->get_kernel(), cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + + // Initialize hash table + const uint32_t traversed_hash_size = hashmap::get_size(traversed_hash_bitlen); + set_value_batch(traversed_hashmap_ptr, + traversed_hash_size, + ~static_cast(0), + traversed_hash_size, + num_queries, + stream); + + dim3 block_dims(block_size, 1, 1); + dim3 grid_dims(num_cta_per_query, num_queries, 1); + RAFT_LOG_DEBUG("Launching JIT multi_cta kernel with %u threads, (%u, %u) blocks %u smem", + block_size, + num_cta_per_query, + num_queries, + smem_size); + + // Get the device descriptor pointer + // dev_ptr() returns const dataset_descriptor_base_t*, but kernel expects const desc_t* + // Since base class is at offset 0, pointer value is the same and kernel can safely cast + const auto* dev_desc = dataset_desc.dev_ptr(stream); + + // Dispatch kernel via launcher + launcher->dispatch(stream, + grid_dims, + block_dims, + smem_size, + topk_indices_ptr, + topk_distances_ptr, + dev_desc, + queries_ptr, + graph.data_handle(), + max_elements, + graph.extent(1), + source_indices_ptr, + ps.num_random_samplings, + ps.rand_xor_mask, + dev_seed_ptr, + num_seeds, + visited_hash_bitlen, + traversed_hashmap_ptr, + traversed_hash_bitlen, + ps.itopk_size, + ps.min_iterations, + ps.max_iterations, + num_executed_iterations, + bitset_ptr, + bitset_len, + original_nbits); + + // Check for errors immediately after launch + cudaError_t err = cudaPeekAtLastError(); + if (err != cudaSuccess) { + std::cerr << "[JIT] ERROR after kernel launch (peek): " << cudaGetErrorString(err) << " (" + << err << ")" << std::endl; + std::cerr.flush(); + } else { + std::cerr << "[JIT] No error after kernel launch (peek)" << std::endl; + std::cerr.flush(); + } + RAFT_CUDA_TRY(err); + + // Synchronize and check again - this will catch kernel execution errors + std::cerr << "[JIT] Synchronizing stream to check for kernel execution errors..." << std::endl; + std::cerr.flush(); + err = cudaStreamSynchronize(stream); + if (err != cudaSuccess) { + std::cerr << "[JIT] ERROR after kernel sync: " << cudaGetErrorString(err) << " (" << err << ")" + << std::endl; + std::cerr.flush(); + } else { + std::cerr << "[JIT] Stream synchronized successfully - kernel completed" << std::endl; + std::cerr.flush(); + } + RAFT_CUDA_TRY(err); +} + +} // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index f7d353d864..f66fe203c4 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -1,9 +1,16 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once +// Include tags header before any namespace declarations to avoid issues when it's included inside +// functions +#ifdef CUVS_ENABLE_JIT_LTO +#include "search_multi_kernel_launcher_jit.cuh" +#include +#endif + #include "compute_distance-ext.cuh" #include "device_common.hpp" #include "hashmap.hpp" @@ -168,24 +175,47 @@ void random_pickup(const dataset_descriptor_host& data std::uint32_t hash_bitlen, cudaStream_t cuda_stream) { - const auto block_size = 256u; - const auto num_teams_per_threadblock = block_size / dataset_desc.team_size; - const dim3 grid_size((num_pickup + num_teams_per_threadblock - 1) / num_teams_per_threadblock, - num_queries); - - random_pickup_kernel<<>>( - dataset_desc.dev_ptr(cuda_stream), - queries_ptr, - num_pickup, - num_distilation, - rand_xor_mask, - seed_ptr, - num_seeds, - result_indices_ptr, - result_distances_ptr, - ldr, - visited_hashmap_ptr, - hash_bitlen); +#ifdef CUVS_ENABLE_JIT_LTO + // Use JIT version when JIT is enabled + random_pickup_jit(dataset_desc, + queries_ptr, + num_queries, + num_pickup, + num_distilation, + rand_xor_mask, + seed_ptr, + num_seeds, + result_indices_ptr, + result_distances_ptr, + ldr, + visited_hashmap_ptr, + hash_bitlen, + cuda_stream); +#else + // Non-JIT path + { + const auto block_size = 256u; + const auto num_teams_per_threadblock = block_size / dataset_desc.team_size; + const dim3 grid_size((num_pickup + num_teams_per_threadblock - 1) / num_teams_per_threadblock, + num_queries); + + random_pickup_kernel<<>>(dataset_desc.dev_ptr(cuda_stream), + queries_ptr, + num_pickup, + num_distilation, + rand_xor_mask, + seed_ptr, + num_seeds, + result_indices_ptr, + result_distances_ptr, + ldr, + visited_hashmap_ptr, + hash_bitlen); + } +#endif } template @@ -402,30 +432,55 @@ void compute_distance_to_child_nodes( SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream) { - const auto block_size = 128; - const auto teams_per_block = block_size / dataset_desc.team_size; - const dim3 grid_size((search_width * graph_degree + teams_per_block - 1) / teams_per_block, - num_queries); - - compute_distance_to_child_nodes_kernel<<>>(parent_node_list, - parent_candidates_ptr, - parent_distance_ptr, - lds, - search_width, - dataset_desc.dev_ptr(cuda_stream), - neighbor_graph_ptr, - graph_degree, - source_indices_ptr, - query_ptr, - visited_hashmap_ptr, - hash_bitlen, - result_indices_ptr, - result_distances_ptr, - ldd, - sample_filter); +#ifdef CUVS_ENABLE_JIT_LTO + // Use JIT version when JIT is enabled + compute_distance_to_child_nodes_jit(parent_node_list, + parent_candidates_ptr, + parent_distance_ptr, + lds, + search_width, + dataset_desc, + neighbor_graph_ptr, + graph_degree, + source_indices_ptr, + query_ptr, + num_queries, + visited_hashmap_ptr, + hash_bitlen, + result_indices_ptr, + result_distances_ptr, + ldd, + sample_filter, + cuda_stream); +#else + // Non-JIT path + { + const auto block_size = 128; + const auto teams_per_block = block_size / dataset_desc.team_size; + const dim3 grid_size((search_width * graph_degree + teams_per_block - 1) / teams_per_block, + num_queries); + + compute_distance_to_child_nodes_kernel<<>>(parent_node_list, + parent_candidates_ptr, + parent_distance_ptr, + lds, + search_width, + dataset_desc.dev_ptr(cuda_stream), + neighbor_graph_ptr, + graph_degree, + source_indices_ptr, + query_ptr, + visited_hashmap_ptr, + hash_bitlen, + result_indices_ptr, + result_distances_ptr, + ldd, + sample_filter); + } +#endif } template @@ -497,17 +552,33 @@ void apply_filter(const SourceIndexT* source_indices_ptr, SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream) { - const std::uint32_t block_size = 256; - const std::uint32_t grid_size = raft::ceildiv(num_queries * result_buffer_size, block_size); - - apply_filter_kernel<<>>(source_indices_ptr, - result_indices_ptr, - result_distances_ptr, - lds, - result_buffer_size, - num_queries, - query_id_offset, - sample_filter); +#ifdef CUVS_ENABLE_JIT_LTO + // Use JIT version when JIT is enabled + apply_filter_jit(source_indices_ptr, + result_indices_ptr, + result_distances_ptr, + lds, + result_buffer_size, + num_queries, + query_id_offset, + sample_filter, + cuda_stream); +#else + // Non-JIT path + { + const std::uint32_t block_size = 256; + const std::uint32_t grid_size = raft::ceildiv(num_queries * result_buffer_size, block_size); + + apply_filter_kernel<<>>(source_indices_ptr, + result_indices_ptr, + result_distances_ptr, + lds, + result_buffer_size, + num_queries, + query_id_offset, + sample_filter); + } +#endif } template diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh new file mode 100644 index 0000000000..1b156c5aef --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -0,0 +1,323 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#ifndef CUVS_ENABLE_JIT_LTO +#error "search_multi_kernel_launcher_jit.cuh included but CUVS_ENABLE_JIT_LTO not defined!" +#endif + +// Tags header should be included before this header (at file scope, not inside functions) +// to avoid namespace definition errors when this header is included inside function bodies + +#include "compute_distance.hpp" // For dataset_descriptor_host +#include "jit_lto_kernels/search_multi_kernel_planner.hpp" +#include "search_plan.cuh" // For search_params +#include +#include +#include +#include +#include + +#include +#include +#include +// Note: We don't include search_multi_kernel_jit.cuh here because: +// - The launcher doesn't need the kernel function definitions +// - The kernel is dispatched via the JIT LTO launcher system +// - Including it would pull in impl files that cause namespace issues + +namespace cuvs::neighbors::cagra::detail::multi_kernel_search { + +// Helper functions to get tags for JIT LTO +namespace { +template +constexpr auto get_data_type_tag() +{ + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_f{}; } + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_h{}; } + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_sc{}; } + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_uc{}; } +} + +template +constexpr auto get_index_type_tag() +{ + if constexpr (std::is_same_v) { + return cuvs::neighbors::cagra::detail::tag_idx_ui{}; + } +} + +template +constexpr auto get_distance_type_tag() +{ + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_dist_f{}; } +} + +template +constexpr auto get_source_index_type_tag() +{ + if constexpr (std::is_same_v) { + return cuvs::neighbors::cagra::detail::tag_idx_ui{}; + } +} + +template +std::string get_sample_filter_name() +{ + if constexpr (std::is_same_v) { + return "filter_none"; + } else if constexpr ( + std::is_same_v> || + std::is_same_v>) { + return "filter_bitset"; + } else { + // Default to none filter for unknown types + return "filter_none"; + } +} +} // namespace + +// JIT version of random_pickup +template +void random_pickup_jit(const dataset_descriptor_host& dataset_desc, + const DataT* queries_ptr, // [num_queries, dataset_dim] + std::size_t num_queries, + std::size_t num_pickup, + unsigned num_distilation, + uint64_t rand_xor_mask, + const IndexT* seed_ptr, // [num_queries, num_seeds] + uint32_t num_seeds, + IndexT* result_indices_ptr, // [num_queries, ldr] + DistanceT* result_distances_ptr, // [num_queries, ldr] + std::size_t ldr, // (*) ldr >= num_pickup + IndexT* visited_hashmap_ptr, // [num_queries, 1 << bitlen] + std::uint32_t hash_bitlen, + cudaStream_t cuda_stream) +{ + // Create planner with tags + using DataTag = decltype(get_data_type_tag()); + using IndexTag = decltype(get_index_type_tag()); + using DistTag = decltype(get_distance_type_tag()); + using SourceTag = decltype(get_source_index_type_tag()); // Use IndexT for source + + // Create planner and register device functions + CagraMultiKernelSearchPlanner planner( + dataset_desc.metric, + "random_pickup_kernel", + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + auto launcher = planner.get_launcher(); + + const auto block_size = 256u; + const auto num_teams_per_threadblock = block_size / dataset_desc.team_size; + const dim3 grid_size((num_pickup + num_teams_per_threadblock - 1) / num_teams_per_threadblock, + num_queries); + + // Get the device descriptor pointer + const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); + + // Dispatch kernel via launcher + launcher->dispatch(cuda_stream, + grid_size, + dim3(block_size, 1, 1), + dataset_desc.smem_ws_size_in_bytes, + dev_desc, + queries_ptr, + num_pickup, + num_distilation, + rand_xor_mask, + seed_ptr, + num_seeds, + result_indices_ptr, + result_distances_ptr, + ldr, + visited_hashmap_ptr, + hash_bitlen); + + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +// JIT version of compute_distance_to_child_nodes +template +void compute_distance_to_child_nodes_jit( + const IndexT* parent_node_list, // [num_queries, search_width] + IndexT* const parent_candidates_ptr, // [num_queries, search_width] + DistanceT* const parent_distance_ptr, // [num_queries, search_width] + std::size_t lds, + uint32_t search_width, + const dataset_descriptor_host& dataset_desc, + const IndexT* neighbor_graph_ptr, // [dataset_size, graph_degree] + std::uint32_t graph_degree, + const SourceIndexT* source_indices_ptr, + const DataT* query_ptr, // [num_queries, data_dim] + std::uint32_t num_queries, + IndexT* visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + std::uint32_t hash_bitlen, + IndexT* result_indices_ptr, // [num_queries, ldd] + DistanceT* result_distances_ptr, // [num_queries, ldd] + std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + SAMPLE_FILTER_T sample_filter, + cudaStream_t cuda_stream) +{ + // Create planner with tags + using DataTag = decltype(get_data_type_tag()); + using IndexTag = decltype(get_index_type_tag()); + using DistTag = decltype(get_distance_type_tag()); + using SourceTag = decltype(get_source_index_type_tag()); + + // Create planner and register device functions + CagraMultiKernelSearchPlanner planner( + dataset_desc.metric, + "compute_distance_to_child_nodes_kernel", + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + + auto launcher = planner.get_launcher(); + + const auto block_size = 128; + const auto teams_per_block = block_size / dataset_desc.team_size; + const dim3 grid_size((search_width * graph_degree + teams_per_block - 1) / teams_per_block, + num_queries); + + // Get the device descriptor pointer + const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); + + // Dispatch kernel via launcher + launcher->dispatch(cuda_stream, + grid_size, + dim3(block_size, 1, 1), + dataset_desc.smem_ws_size_in_bytes, + parent_node_list, + parent_candidates_ptr, + parent_distance_ptr, + lds, + search_width, + dev_desc, + neighbor_graph_ptr, + graph_degree, + source_indices_ptr, + query_ptr, + visited_hashmap_ptr, + hash_bitlen, + result_indices_ptr, + result_distances_ptr, + ldd, + sample_filter); + + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +// JIT version of apply_filter +template +void apply_filter_jit(const SourceIndexT* source_indices_ptr, + INDEX_T* const result_indices_ptr, + DISTANCE_T* const result_distances_ptr, + const std::size_t lds, + const std::uint32_t result_buffer_size, + const std::uint32_t num_queries, + const INDEX_T query_id_offset, + SAMPLE_FILTER_T sample_filter, + cudaStream_t cuda_stream) +{ + // Extract bitset data from filter object (if it's a bitset_filter) + uint32_t* bitset_ptr = nullptr; + SourceIndexT bitset_len = 0; + SourceIndexT original_nbits = 0; + + if constexpr (!std::is_same_v) { + // Try to extract bitset data from the filter + // bitset_filter has a view() method that returns the bitset_view + if constexpr (std::is_same_v< + SAMPLE_FILTER_T, + cuvs::neighbors::filtering::bitset_filter>) { + auto bitset_view = sample_filter.view(); + bitset_ptr = const_cast(bitset_view.data()); + bitset_len = static_cast(bitset_view.size()); + original_nbits = static_cast(bitset_view.get_original_nbits()); + } + } + + // Create planner with tags + using DataTag = + decltype(get_data_type_tag()); // Not used for apply_filter, but required by planner + using IndexTag = decltype(get_index_type_tag()); + using DistTag = decltype(get_distance_type_tag()); + using SourceTag = decltype(get_source_index_type_tag()); + + // Create planner - apply_filter doesn't use dataset_descriptor, so we use dummy values + // The kernel name is "apply_filter_kernel" and build_entrypoint_name will handle it specially + CagraMultiKernelSearchPlanner planner( + cuvs::distance::DistanceType::L2Expanded, + "apply_filter_kernel", + 8, + 128, + false, + 0, + 0); // Dummy values, not used by apply_filter + + // Add sample filter device function - determine filter type from template parameter + planner.add_sample_filter_device_function(get_sample_filter_name()); + + auto launcher = planner.get_launcher(); + + const std::uint32_t block_size = 256; + const std::uint32_t grid_size = raft::ceildiv(num_queries * result_buffer_size, block_size); + + // Dispatch kernel via launcher with bitset parameters + launcher->dispatch(cuda_stream, + dim3(grid_size, 1, 1), + dim3(block_size, 1, 1), + 0, // No shared memory needed + source_indices_ptr, + result_indices_ptr, + result_distances_ptr, + lds, + result_buffer_size, + num_queries, + query_id_offset, + bitset_ptr, + bitset_len, + original_nbits); + + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +} // namespace cuvs::neighbors::cagra::detail::multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh index 0fdf0f208b..7a6a12b67e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh @@ -34,6 +34,7 @@ #include #include +// All includes are done before opening namespace to avoid nested namespace issues namespace cuvs::neighbors::cagra::detail { namespace single_cta_search { diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_inst.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_inst.cuh index 11b468cfca..d242e13b95 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_inst.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_inst.cuh @@ -1,13 +1,16 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once -#include "search_single_cta_kernel-inl.cuh" #include +// Include explicit instantiations before namespace (launcher includes JIT LTO headers with +// namespace definitions) +#include "search_single_cta_kernel_explicit_inst.cuh" + namespace cuvs::neighbors::cagra::detail::single_cta_search { #define instantiate_kernel_selection(DataT, IndexT, DistanceT, SampleFilterT) \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 404817e582..87da77456b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -2214,136 +2214,5 @@ auto get_runner(Args... args) -> std::shared_ptr weak = runner; return runner; } - -template -void select_and_run( - const dataset_descriptor_host& dataset_desc, - raft::device_matrix_view graph, - std::optional> source_indices, - uintptr_t topk_indices_ptr, // [num_queries, topk] - DistanceT* topk_distances_ptr, // [num_queries, topk] - const DataT* queries_ptr, // [num_queries, dataset_dim] - uint32_t num_queries, - const IndexT* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* num_executed_iterations, // [num_queries,] - const search_params& ps, - uint32_t topk, - uint32_t num_itopk_candidates, - uint32_t block_size, // - uint32_t smem_size, - int64_t hash_bitlen, - IndexT* hashmap_ptr, - size_t small_hash_bitlen, - size_t small_hash_reset_interval, - uint32_t num_seeds, - SampleFilterT sample_filter, - cudaStream_t stream) -{ - const SourceIndexT* source_indices_ptr = - source_indices.has_value() ? source_indices->data_handle() : nullptr; - - uint32_t max_candidates{}; - if (num_itopk_candidates <= 64) { - max_candidates = 64; - } else if (num_itopk_candidates <= 128) { - max_candidates = 128; - } else if (num_itopk_candidates <= 256) { - max_candidates = 256; - } else { - max_candidates = - 32; // irrelevant, radix based topk is used (see choose_itopk_and_max_candidates) - } - - uint32_t max_itopk{}; - assert(ps.itopk_size <= 512); - if (num_itopk_candidates <= 256) { // bitonic sort - if (ps.itopk_size <= 64) { - max_itopk = 64; - } else if (ps.itopk_size <= 128) { - max_itopk = 128; - } else if (ps.itopk_size <= 256) { - max_itopk = 256; - } else { - max_itopk = 512; - } - } else { // radix sort - if (ps.itopk_size <= 256) { - max_itopk = 256; - } else { - max_itopk = 512; - } - } - - if (ps.persistent) { - using runner_type = persistent_runner_t; - - get_runner(/* -Note, we're passing the descriptor by reference here, and this reference is going to be passed to a -new spawned thread, which is dangerous. However, the descriptor is copied in that thread before the -control is returned in this thread (in persistent_runner_t constructor), so we're safe. -*/ - std::cref(dataset_desc), - graph, - source_indices_ptr, - num_itopk_candidates, - block_size, - smem_size, - hash_bitlen, - small_hash_bitlen, - small_hash_reset_interval, - ps.num_random_samplings, - ps.rand_xor_mask, - num_seeds, - max_candidates, - max_itopk, - ps.itopk_size, - ps.search_width, - ps.min_iterations, - ps.max_iterations, - sample_filter, - ps.persistent_lifetime, - ps.persistent_device_usage) - ->launch(topk_indices_ptr, topk_distances_ptr, queries_ptr, num_queries, topk); - } else { - using descriptor_base_type = dataset_descriptor_base_t; - auto kernel = search_kernel_config:: - choose_itopk_and_mx_candidates(ps.itopk_size, num_itopk_candidates, block_size); - RAFT_CUDA_TRY( - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); - dim3 thread_dims(block_size, 1, 1); - dim3 block_dims(1, num_queries, 1); - RAFT_LOG_DEBUG( - "Launching kernel with %u threads, %u block %u smem", block_size, num_queries, smem_size); - kernel<<>>(topk_indices_ptr, - topk_distances_ptr, - topk, - dataset_desc.dev_ptr(stream), - queries_ptr, - graph.data_handle(), - graph.extent(1), - source_indices_ptr, - ps.num_random_samplings, - ps.rand_xor_mask, - dev_seed_ptr, - num_seeds, - hashmap_ptr, - max_candidates, - max_itopk, - ps.itopk_size, - ps.search_width, - ps.min_iterations, - ps.max_iterations, - num_executed_iterations, - hash_bitlen, - small_hash_bitlen, - small_hash_reset_interval, - sample_filter); - RAFT_CUDA_TRY(cudaPeekAtLastError()); - } -} } // namespace single_cta_search } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_explicit_inst.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_explicit_inst.cuh index e93d24aaf6..8f715bbbc4 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_explicit_inst.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_explicit_inst.cuh @@ -6,7 +6,7 @@ #pragma once #ifdef CUVS_ENABLE_JIT_LTO -#include "search_single_cta_kernel_jit.cuh" +#include "search_single_cta_kernel_launcher_jit.cuh" #else -#include "search_single_cta_kernel-inl.cuh" +#include "search_single_cta_kernel_launcher.cuh" #endif diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit-inl.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit-inl.cuh deleted file mode 100644 index 56201c2671..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit-inl.cuh +++ /dev/null @@ -1,863 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ -#pragma once - -#include "search_single_cta_kernel_jit.cuh" - -#include "bitonic.hpp" -#include "device_common.hpp" -#include "hashmap.hpp" -#include "search_plan.cuh" -#include "topk_by_radix.cuh" -#include "topk_for_cagra/topk.h" -#include "utils.hpp" - -#include -#include -#include -#include -#include -#include - -#include - -#include "../ann_utils.cuh" - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace cuvs::neighbors::cagra::detail::single_cta_search { - -// JIT version of compute_distance_to_random_nodes - uses extern compute_distance -template -RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( - IndexT* __restrict__ result_indices_ptr, - DistanceT* __restrict__ result_distances_ptr, - const DataT* dataset_ptr, - const uint8_t* encoded_dataset_ptr, - uint32_t smem_ws_ptr, - IndexT dataset_size, - uint32_t dim, - uint32_t encoded_dataset_dim, - uint32_t ld, - uint32_t team_size_bitshift, - const DistanceT* dataset_norms, - const CodebookT* vq_code_book_ptr, - const CodebookT* pq_code_book_ptr, - const uint32_t num_pickup, - const uint32_t num_distilation, - const uint64_t rand_xor_mask, - const IndexT* __restrict__ seed_ptr, - const uint32_t num_seeds, - IndexT* __restrict__ visited_hash_ptr, - const uint32_t visited_hash_bitlen, - IndexT* __restrict__ traversed_hash_ptr, - const uint32_t traversed_hash_bitlen, - const uint32_t block_id = 0, - const uint32_t num_blocks = 1) -{ - constexpr unsigned warp_size = 32; - const auto max_i = raft::round_up_safe(num_pickup, warp_size >> team_size_bitshift); - - for (uint32_t i = threadIdx.x >> team_size_bitshift; i < max_i; - i += (blockDim.x >> team_size_bitshift)) { - const bool valid_i = (i < num_pickup); - - IndexT best_index_team_local = raft::upper_bound(); - DistanceT best_norm2_team_local = raft::upper_bound(); - for (uint32_t j = 0; j < num_distilation; j++) { - IndexT seed_index = 0; - if (valid_i) { - uint32_t gid = block_id + (num_blocks * (i + (num_pickup * j))); - if (seed_ptr && (gid < num_seeds)) { - seed_index = seed_ptr[gid]; - } else { - seed_index = device::xorshift64(gid ^ rand_xor_mask) % dataset_size; - } - } - - DistanceT norm2 = 0; - if constexpr (DescType == DescriptorType::Standard) { - norm2 = - valid_i - ? compute_distance_standard( - dataset_ptr, smem_ws_ptr, seed_index, dim, ld, team_size_bitshift, dataset_norms) - : 0; - } else if constexpr (DescType == DescriptorType::VPQ) { - norm2 = valid_i ? compute_distance_vpq(encoded_dataset_ptr, - smem_ws_ptr, - seed_index, - encoded_dataset_dim, - vq_code_book_ptr, - pq_code_book_ptr, - team_size_bitshift) - : 0; - } - const auto norm2_sum = device::team_sum(norm2, team_size_bitshift); - - if (valid_i && (norm2_sum < best_norm2_team_local)) { - best_norm2_team_local = norm2_sum; - best_index_team_local = seed_index; - } - } - - const unsigned lane_id = threadIdx.x & ((1u << team_size_bitshift) - 1u); - if (valid_i && lane_id == 0) { - if (best_index_team_local != raft::upper_bound()) { - if (hashmap::insert(visited_hash_ptr, visited_hash_bitlen, best_index_team_local) == 0) { - best_norm2_team_local = raft::upper_bound(); - best_index_team_local = raft::upper_bound(); - } else if ((traversed_hash_ptr != nullptr) && - hashmap::search( - traversed_hash_ptr, traversed_hash_bitlen, best_index_team_local)) { - best_norm2_team_local = raft::upper_bound(); - best_index_team_local = raft::upper_bound(); - } - } - result_distances_ptr[i] = best_norm2_team_local; - result_indices_ptr[i] = best_index_team_local; - } - } -} - -// JIT version of compute_distance_to_child_nodes - uses extern compute_distance -template -RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( - IndexT* __restrict__ result_child_indices_ptr, - DistanceT* __restrict__ result_child_distances_ptr, - const DataT* dataset_ptr, - const uint8_t* encoded_dataset_ptr, - uint32_t smem_ws_ptr, - uint32_t dim, - uint32_t encoded_dataset_dim, - uint32_t ld, - uint32_t team_size_bitshift, - const DistanceT* dataset_norms, - const CodebookT* vq_code_book_ptr, - const CodebookT* pq_code_book_ptr, - const IndexT* __restrict__ knn_graph, - const uint32_t knn_k, - IndexT* __restrict__ visited_hashmap_ptr, - const uint32_t visited_hash_bitlen, - IndexT* __restrict__ traversed_hashmap_ptr, - const uint32_t traversed_hash_bitlen, - const IndexT* __restrict__ parent_indices, - const IndexT* __restrict__ internal_topk_list, - const uint32_t search_width, - int* __restrict__ result_position = nullptr, - const int max_result_position = 0) -{ - constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; - constexpr IndexT invalid_index = ~static_cast(0); - - // Read child indices of parents from knn graph and check if the distance computation is - // necessary. - for (uint32_t i = threadIdx.x; i < knn_k * search_width; i += blockDim.x) { - const IndexT smem_parent_id = parent_indices[i / knn_k]; - IndexT child_id = invalid_index; - if (smem_parent_id != invalid_index) { - const auto parent_id = internal_topk_list[smem_parent_id] & ~index_msb_1_mask; - child_id = knn_graph[(i % knn_k) + (static_cast(knn_k) * parent_id)]; - } - if (child_id != invalid_index) { - if (hashmap::insert(visited_hashmap_ptr, visited_hash_bitlen, child_id) == 0) { - child_id = invalid_index; - } else if ((traversed_hashmap_ptr != nullptr) && - hashmap::search( - traversed_hashmap_ptr, traversed_hash_bitlen, child_id)) { - child_id = invalid_index; - } - } - if (STATIC_RESULT_POSITION) { - result_child_indices_ptr[i] = child_id; - } else if (child_id != invalid_index) { - int j = atomicSub(result_position, 1) - 1; - result_child_indices_ptr[j] = child_id; - } - } - __syncthreads(); - - // Compute the distance to child nodes using extern compute_distance - constexpr unsigned warp_size = 32; - const auto num_k = knn_k * search_width; - const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bitshift); - const bool lead_lane = (threadIdx.x & ((1u << team_size_bitshift) - 1u)) == 0; - const uint32_t ofst = STATIC_RESULT_POSITION ? 0 : result_position[0]; - for (uint32_t i = threadIdx.x >> team_size_bitshift; i < max_i; - i += blockDim.x >> team_size_bitshift) { - const auto j = i + ofst; - const bool valid_i = STATIC_RESULT_POSITION ? (j < num_k) : (j < max_result_position); - const auto child_id = valid_i ? result_child_indices_ptr[j] : invalid_index; - - DistanceT child_dist = 0; - if constexpr (DescType == DescriptorType::Standard) { - child_dist = device::team_sum( - (child_id != invalid_index) - ? compute_distance_standard( - dataset_ptr, smem_ws_ptr, child_id, dim, ld, team_size_bitshift, dataset_norms) - : (lead_lane ? raft::upper_bound() : 0), - team_size_bitshift); - } else if constexpr (DescType == DescriptorType::VPQ) { - child_dist = device::team_sum((child_id != invalid_index) - ? compute_distance_vpq(encoded_dataset_ptr, - smem_ws_ptr, - child_id, - encoded_dataset_dim, - vq_code_book_ptr, - pq_code_book_ptr, - team_size_bitshift) - : (lead_lane ? raft::upper_bound() : 0), - team_size_bitshift); - } - __syncwarp(); - - // Store the distance - if (valid_i && lead_lane) { result_child_distances_ptr[j] = child_dist; } - } -} - -// JIT version of search_core - uses extern functions instead of templated descriptor -template -RAFT_DEVICE_INLINE_FUNCTION void search_core(uintptr_t result_indices_ptr, - DistanceT* const result_distances_ptr, - const std::uint32_t top_k, - const DataT* const queries_ptr, - const IndexT* const knn_graph, - const std::uint32_t graph_degree, - const SourceIndexT* source_indices_ptr, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const IndexT* seed_ptr, - const uint32_t num_seeds, - IndexT* const visited_hashmap_ptr, - const std::uint32_t max_candidates, - const std::uint32_t max_itopk, - const std::uint32_t internal_topk, - const std::uint32_t search_width, - const std::uint32_t min_iteration, - const std::uint32_t max_iteration, - std::uint32_t* const num_executed_iterations, - const std::uint32_t hash_bitlen, - const std::uint32_t small_hash_bitlen, - const std::uint32_t small_hash_reset_interval, - const std::uint32_t query_id, - const DataT* dataset_ptr, - const uint8_t* encoded_dataset_ptr, - IndexT dataset_size, - uint32_t dim, - uint32_t encoded_dataset_dim, - uint32_t ld, - const DistanceT* dataset_norms, - const CodebookT* vq_code_book_ptr, - const CodebookT* pq_code_book_ptr) -{ - using LOAD_T = device::LOAD_128BIT_T; - - auto to_source_index = [source_indices_ptr](IndexT x) { - return source_indices_ptr == nullptr ? static_cast(x) : source_indices_ptr[x]; - }; - -#ifdef _CLK_BREAKDOWN - std::uint64_t clk_init = 0; - std::uint64_t clk_compute_1st_distance = 0; - std::uint64_t clk_topk = 0; - std::uint64_t clk_reset_hash = 0; - std::uint64_t clk_pickup_parents = 0; - std::uint64_t clk_restore_hash = 0; - std::uint64_t clk_compute_distance = 0; - std::uint64_t clk_start; -#define _CLK_START() clk_start = clock64() -#define _CLK_REC(V) V += clock64() - clk_start; -#else -#define _CLK_START() -#define _CLK_REC(V) -#endif - _CLK_START(); - - extern __shared__ uint8_t smem[]; - - // Layout of result_buffer - const auto result_buffer_size = internal_topk + (search_width * graph_degree); - const auto result_buffer_size_32 = raft::round_up_safe(result_buffer_size, 32); - const auto small_hash_size = hashmap::get_size(small_hash_bitlen); - - // Compute smem_ws_size_in_bytes based on descriptor type - uint32_t smem_ws_size_in_bytes = 0; - if constexpr (DescType == DescriptorType::Standard) { - using desc_type = cuvs::neighbors::cagra::detail:: - standard_dataset_descriptor_t; - using QUERY_T = typename desc_type::QUERY_T; - smem_ws_size_in_bytes = - sizeof(desc_type) + raft::round_up_safe(dim, DatasetBlockDim) * sizeof(QUERY_T); - } else if constexpr (DescType == DescriptorType::VPQ) { - using desc_type = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; - using QUERY_T = typename desc_type::QUERY_T; - constexpr uint32_t kSMemCodeBookSizeInBytes = (1 << PQ_BITS) * PQ_LEN * sizeof(CodebookT); - smem_ws_size_in_bytes = sizeof(desc_type) + kSMemCodeBookSizeInBytes + - raft::round_up_safe(dim, DatasetBlockDim) * sizeof(QUERY_T); - } - - // Set smem working buffer for the distance calculation using extern function - uint32_t smem_ws_ptr = 0; - if constexpr (DescType == DescriptorType::Standard) { - smem_ws_ptr = - setup_workspace_standard( - smem, queries_ptr, query_id, dataset_ptr, dataset_size, dim, ld, dataset_norms); - } else if constexpr (DescType == DescriptorType::VPQ) { - smem_ws_ptr = setup_workspace_vpq(smem, - queries_ptr, - query_id, - encoded_dataset_ptr, - encoded_dataset_dim, - vq_code_book_ptr, - pq_code_book_ptr, - dataset_size, - dim); - } - - auto* __restrict__ result_indices_buffer = - reinterpret_cast(smem + smem_ws_size_in_bytes); - auto* __restrict__ result_distances_buffer = - reinterpret_cast(result_indices_buffer + result_buffer_size_32); - auto* __restrict__ visited_hash_buffer = - reinterpret_cast(result_distances_buffer + result_buffer_size_32); - auto* __restrict__ parent_list_buffer = - reinterpret_cast(visited_hash_buffer + small_hash_size); - auto* __restrict__ topk_ws = reinterpret_cast(parent_list_buffer + search_width); - auto* terminate_flag = reinterpret_cast(topk_ws + 3); - auto* __restrict__ smem_work_ptr = reinterpret_cast(terminate_flag + 1); - - // A flag for filtering. - auto filter_flag = terminate_flag; - - if (threadIdx.x == 0) { - terminate_flag[0] = 0; - topk_ws[0] = ~0u; - } - - // Init hashmap - IndexT* local_visited_hashmap_ptr; - if (small_hash_bitlen) { - local_visited_hashmap_ptr = visited_hash_buffer; - } else { - local_visited_hashmap_ptr = visited_hashmap_ptr + (hashmap::get_size(hash_bitlen) * blockIdx.y); - } - hashmap::init(local_visited_hashmap_ptr, hash_bitlen, 0); - __syncthreads(); - _CLK_REC(clk_init); - - // compute distance to randomly selecting nodes using JIT version - _CLK_START(); - const IndexT* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; - constexpr uint32_t team_size_bits = raft::Pow2::Log2; - compute_distance_to_random_nodes_jit(result_indices_buffer, - result_distances_buffer, - dataset_ptr, - encoded_dataset_ptr, - smem_ws_ptr, - dataset_size, - dim, - encoded_dataset_dim, - ld, - team_size_bits, - dataset_norms, - vq_code_book_ptr, - pq_code_book_ptr, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen, - (IndexT*)nullptr, - 0); - __syncthreads(); - _CLK_REC(clk_compute_1st_distance); - - std::uint32_t iter = 0; - while (1) { - // sort - if constexpr (TOPK_BY_BITONIC_SORT) { - assert(blockDim.x >= 64); - const bool bitonic_sort_and_full_multi_warps = (max_candidates > 128) ? true : false; - - // reset small-hash table. - if ((iter + 1) % small_hash_reset_interval == 0) { - _CLK_START(); - unsigned hash_start_tid; - if (blockDim.x == 32) { - hash_start_tid = 0; - } else if (blockDim.x == 64) { - if (bitonic_sort_and_full_multi_warps || BITONIC_SORT_AND_MERGE_MULTI_WARPS) { - hash_start_tid = 0; - } else { - hash_start_tid = 32; - } - } else { - if (bitonic_sort_and_full_multi_warps || BITONIC_SORT_AND_MERGE_MULTI_WARPS) { - hash_start_tid = 64; - } else { - hash_start_tid = 32; - } - } - hashmap::init(local_visited_hashmap_ptr, hash_bitlen, hash_start_tid); - _CLK_REC(clk_reset_hash); - } - - // topk with bitonic sort - _CLK_START(); - // For JIT version, we always check filter_flag at runtime since sample_filter is extern - if (*filter_flag != 0) { - // Move the filtered out index to the end of the itopk list - for (unsigned i = 0; i < search_width; i++) { - move_invalid_to_end_of_list( - result_indices_buffer, result_distances_buffer, internal_topk); - } - if (threadIdx.x == 0) { *terminate_flag = 0; } - } - topk_by_bitonic_sort_and_merge( - result_distances_buffer, - result_indices_buffer, - max_itopk, - internal_topk, - result_distances_buffer + internal_topk, - result_indices_buffer + internal_topk, - max_candidates, - search_width * graph_degree, - topk_ws, - (iter == 0)); - __syncthreads(); - _CLK_REC(clk_topk); - } else { - _CLK_START(); - // topk with radix block sort - topk_by_radix_sort{}(max_itopk, - internal_topk, - result_buffer_size, - reinterpret_cast(result_distances_buffer), - result_indices_buffer, - reinterpret_cast(result_distances_buffer), - result_indices_buffer, - nullptr, - topk_ws, - true, - smem_work_ptr); - _CLK_REC(clk_topk); - - // reset small-hash table - if ((iter + 1) % small_hash_reset_interval == 0) { - _CLK_START(); - hashmap::init(local_visited_hashmap_ptr, hash_bitlen); - _CLK_REC(clk_reset_hash); - } - } - __syncthreads(); - - if (iter + 1 == max_iteration) { break; } - - // pick up next parents - if (threadIdx.x < 32) { - _CLK_START(); - pickup_next_parents( - terminate_flag, parent_list_buffer, result_indices_buffer, internal_topk, search_width); - _CLK_REC(clk_pickup_parents); - } - - // restore small-hash table by putting internal-topk indices in it - if ((iter + 1) % small_hash_reset_interval == 0) { - const unsigned first_tid = ((blockDim.x <= 32) ? 0 : 32); - _CLK_START(); - hashmap_restore( - local_visited_hashmap_ptr, hash_bitlen, result_indices_buffer, internal_topk, first_tid); - _CLK_REC(clk_restore_hash); - } - __syncthreads(); - - if (*terminate_flag && iter >= min_iteration) { break; } - - // compute the norms between child nodes and query node using JIT version - _CLK_START(); - compute_distance_to_child_nodes_jit(result_indices_buffer + internal_topk, - result_distances_buffer + internal_topk, - dataset_ptr, - encoded_dataset_ptr, - smem_ws_ptr, - dim, - encoded_dataset_dim, - ld, - team_size_bits, - dataset_norms, - vq_code_book_ptr, - pq_code_book_ptr, - knn_graph, - graph_degree, - local_visited_hashmap_ptr, - hash_bitlen, - (IndexT*)nullptr, - 0, - parent_list_buffer, - result_indices_buffer, - search_width); - __syncthreads(); - _CLK_REC(clk_compute_distance); - - // Filtering - use extern sample_filter function - if (threadIdx.x == 0) { *filter_flag = 0; } - __syncthreads(); - - constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; - const IndexT invalid_index = utils::get_max_value(); - - for (unsigned p = threadIdx.x; p < search_width; p += blockDim.x) { - if (parent_list_buffer[p] != invalid_index) { - const auto parent_id = result_indices_buffer[parent_list_buffer[p]] & ~index_msb_1_mask; - if (!sample_filter(query_id, to_source_index(parent_id))) { - result_distances_buffer[parent_list_buffer[p]] = utils::get_max_value(); - result_indices_buffer[parent_list_buffer[p]] = invalid_index; - *filter_flag = 1; - } - } - } - __syncthreads(); - - iter++; - } - - // Post process for filtering - use extern sample_filter function - constexpr IndexT index_msb_1_mask = utils::gen_index_msb_1_mask::value; - const IndexT invalid_index = utils::get_max_value(); - - for (unsigned i = threadIdx.x; i < internal_topk + search_width * graph_degree; i += blockDim.x) { - const auto node_id = result_indices_buffer[i] & ~index_msb_1_mask; - if (node_id != (invalid_index & ~index_msb_1_mask) && - !sample_filter(query_id, to_source_index(node_id))) { - result_distances_buffer[i] = utils::get_max_value(); - result_indices_buffer[i] = invalid_index; - } - } - - __syncthreads(); - // Move invalid index items to the end of the buffer without sorting the entire buffer - using scan_op_t = cub::WarpScan; - auto& temp_storage = *reinterpret_cast(smem_work_ptr); - - constexpr std::uint32_t warp_size = 32; - if (threadIdx.x < warp_size) { - std::uint32_t num_found_valid = 0; - for (std::uint32_t buffer_offset = 0; buffer_offset < internal_topk; - buffer_offset += warp_size) { - const auto src_position = buffer_offset + threadIdx.x; - const std::uint32_t is_valid_index = - (result_indices_buffer[src_position] & (~index_msb_1_mask)) == invalid_index ? 0 : 1; - std::uint32_t new_position; - scan_op_t(temp_storage).InclusiveSum(is_valid_index, new_position); - if (is_valid_index) { - const auto dst_position = num_found_valid + (new_position - 1); - result_indices_buffer[dst_position] = result_indices_buffer[src_position]; - result_distances_buffer[dst_position] = result_distances_buffer[src_position]; - } - - num_found_valid += new_position; - for (std::uint32_t offset = (warp_size >> 1); offset > 0; offset >>= 1) { - const auto v = raft::shfl_xor(num_found_valid, offset); - if ((threadIdx.x & offset) == 0) { num_found_valid = v; } - } - - if (num_found_valid >= top_k) { break; } - } - - if (num_found_valid < top_k) { - for (std::uint32_t i = num_found_valid + threadIdx.x; i < internal_topk; i += warp_size) { - result_indices_buffer[i] = invalid_index; - result_distances_buffer[i] = utils::get_max_value(); - } - } - } - - // If the sufficient number of valid indexes are not in the internal topk, pick up from the - // candidate list. - if (top_k > internal_topk || result_indices_buffer[top_k - 1] == invalid_index) { - __syncthreads(); - topk_by_bitonic_sort_and_merge( - result_distances_buffer, - result_indices_buffer, - max_itopk, - internal_topk, - result_distances_buffer + internal_topk, - result_indices_buffer + internal_topk, - max_candidates, - search_width * graph_degree, - topk_ws, - (iter == 0)); - } - __syncthreads(); - - // NB: The indices pointer is tagged with its element size. - const uint32_t index_element_tag = result_indices_ptr & 0x3; - result_indices_ptr ^= index_element_tag; - auto write_indices = - index_element_tag == 3 - ? [](uintptr_t ptr, - uint32_t i, - SourceIndexT x) { reinterpret_cast(ptr)[i] = static_cast(x); } - : index_element_tag == 2 - ? [](uintptr_t ptr, - uint32_t i, - SourceIndexT x) { reinterpret_cast(ptr)[i] = static_cast(x); } - : index_element_tag == 1 - ? [](uintptr_t ptr, - uint32_t i, - SourceIndexT x) { reinterpret_cast(ptr)[i] = static_cast(x); } - : [](uintptr_t ptr, uint32_t i, SourceIndexT x) { - reinterpret_cast(ptr)[i] = static_cast(x); - }; - for (std::uint32_t i = threadIdx.x; i < top_k; i += blockDim.x) { - unsigned j = i + (top_k * query_id); - unsigned ii = i; - if constexpr (TOPK_BY_BITONIC_SORT) { ii = device::swizzling(i); } - if (result_distances_ptr != nullptr) { result_distances_ptr[j] = result_distances_buffer[ii]; } - - auto internal_index = - result_indices_buffer[ii] & ~index_msb_1_mask; // clear most significant bit - auto source_index = to_source_index(internal_index); - write_indices(result_indices_ptr, j, source_index); - } - if (threadIdx.x == 0 && num_executed_iterations != nullptr) { - num_executed_iterations[query_id] = iter + 1; - } -#ifdef _CLK_BREAKDOWN - if ((threadIdx.x == 0 || threadIdx.x == blockDim.x - 1) && ((query_id * 3) % gridDim.y < 3)) { - printf( - "%s:%d " - "query, %d, thread, %d" - ", init, %lu" - ", 1st_distance, %lu" - ", topk, %lu" - ", reset_hash, %lu" - ", pickup_parents, %lu" - ", restore_hash, %lu" - ", distance, %lu" - "\n", - __FILE__, - __LINE__, - query_id, - threadIdx.x, - clk_init, - clk_compute_1st_distance, - clk_topk, - clk_reset_hash, - clk_pickup_parents, - clk_restore_hash, - clk_compute_distance); - } -#endif -} - -// JIT kernel wrapper - calls search_core -template -RAFT_KERNEL __launch_bounds__(1024, 1) - search_kernel_jit(uintptr_t result_indices_ptr, - DistanceT* const result_distances_ptr, - const std::uint32_t top_k, - const DataT* const queries_ptr, - const IndexT* const knn_graph, - const std::uint32_t graph_degree, - const SourceIndexT* source_indices_ptr, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const IndexT* seed_ptr, - const uint32_t num_seeds, - IndexT* const visited_hashmap_ptr, - const std::uint32_t max_candidates, - const std::uint32_t max_itopk, - const std::uint32_t internal_topk, - const std::uint32_t search_width, - const std::uint32_t min_iteration, - const std::uint32_t max_iteration, - std::uint32_t* const num_executed_iterations, - const std::uint32_t hash_bitlen, - const std::uint32_t small_hash_bitlen, - const std::uint32_t small_hash_reset_interval, - const DataT* dataset_ptr, - const uint8_t* encoded_dataset_ptr, - IndexT dataset_size, - uint32_t dim, - uint32_t encoded_dataset_dim, - uint32_t ld, - const DistanceT* dataset_norms, - const CodebookT* vq_code_book_ptr, - const CodebookT* pq_code_book_ptr, - SampleFilterT sample_filter) -{ - const auto query_id = blockIdx.y; - search_core(result_indices_ptr, - result_distances_ptr, - top_k, - queries_ptr, - knn_graph, - graph_degree, - source_indices_ptr, - num_distilation, - rand_xor_mask, - seed_ptr, - num_seeds, - visited_hashmap_ptr, - max_candidates, - max_itopk, - internal_topk, - search_width, - min_iteration, - max_iteration, - num_executed_iterations, - hash_bitlen, - small_hash_bitlen, - small_hash_reset_interval, - query_id, - dataset_ptr, - encoded_dataset_ptr, - dataset_size, - dim, - encoded_dataset_dim, - ld, - dataset_norms, - vq_code_book_ptr, - pq_code_book_ptr); -} - -} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit.cuh deleted file mode 100644 index dd182729ae..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_jit.cuh +++ /dev/null @@ -1,106 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "compute_distance-ext.cuh" -#include "device_common.hpp" -#include "hashmap.hpp" -#include "utils.hpp" - -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace cuvs::neighbors::cagra::detail::single_cta_search { - -// Enum to distinguish between descriptor types -enum class DescriptorType { Standard, VPQ }; - -// These extern device functions are linked at runtime using JIT-LTO. -// They are templated on descriptor parameters (not DescriptorT) and create -// descriptor instances internally. - -// Standard descriptor extern functions -template -extern __device__ uint32_t setup_workspace_standard(void* smem, - const DataT* queries, - uint32_t query_id, - const DataT* dataset_ptr, - IndexT dataset_size, - uint32_t dim, - uint32_t ld, - const DistanceT* dataset_norms = nullptr); - -template -extern __device__ DistanceT compute_distance_standard(const DataT* dataset_ptr, - uint32_t smem_ws_ptr, - IndexT dataset_index, - uint32_t dim, - uint32_t ld, - uint32_t team_size_bitshift, - const DistanceT* dataset_norms = nullptr); - -// VPQ descriptor extern functions -template -extern __device__ uint32_t setup_workspace_vpq(void* smem, - const DataT* queries, - uint32_t query_id, - const uint8_t* encoded_dataset_ptr, - uint32_t encoded_dataset_dim, - const CodebookT* vq_code_book_ptr, - const CodebookT* pq_code_book_ptr, - IndexT dataset_size, - uint32_t dim); - -template -extern __device__ DistanceT compute_distance_vpq(const uint8_t* encoded_dataset_ptr, - uint32_t smem_ws_ptr, - IndexT dataset_index, - uint32_t encoded_dataset_dim, - const CodebookT* vq_code_book_ptr, - const CodebookT* pq_code_book_ptr, - uint32_t team_size_bitshift); - -// Sample filter extern function -template -extern __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id); - -} // namespace cuvs::neighbors::cagra::detail::single_cta_search - -// Include the implementation -#include "search_single_cta_kernel_jit-inl.cuh" diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher.cuh new file mode 100644 index 0000000000..dc66a26d05 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher.cuh @@ -0,0 +1,117 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "search_single_cta_kernel-inl.cuh" // For search_kernel_config, persistent_runner_t, etc. +#include "search_single_cta_kernel_launcher_common.cuh" + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +template +void select_and_run( + const dataset_descriptor_host& dataset_desc, + raft::device_matrix_view graph, + std::optional> source_indices, + uintptr_t topk_indices_ptr, // [num_queries, topk] + DistanceT* topk_distances_ptr, // [num_queries, topk] + const DataT* queries_ptr, // [num_queries, dataset_dim] + uint32_t num_queries, + const IndexT* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* num_executed_iterations, // [num_queries,] + const search_params& ps, + uint32_t topk, + uint32_t num_itopk_candidates, + uint32_t block_size, // + uint32_t smem_size, + int64_t hash_bitlen, + IndexT* hashmap_ptr, + size_t small_hash_bitlen, + size_t small_hash_reset_interval, + uint32_t num_seeds, + SampleFilterT sample_filter, + cudaStream_t stream) +{ + const SourceIndexT* source_indices_ptr = + source_indices.has_value() ? source_indices->data_handle() : nullptr; + + // Use common logic to compute launch config + auto config = compute_launch_config(num_itopk_candidates, ps.itopk_size, block_size); + uint32_t max_candidates = config.max_candidates; + uint32_t max_itopk = config.max_itopk; + + if (ps.persistent) { + using runner_type = persistent_runner_t; + + get_runner(/* +Note, we're passing the descriptor by reference here, and this reference is going to be passed to a +new spawned thread, which is dangerous. However, the descriptor is copied in that thread before the +control is returned in this thread (in persistent_runner_t constructor), so we're safe. +*/ + std::cref(dataset_desc), + graph, + source_indices_ptr, + num_itopk_candidates, + block_size, + smem_size, + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + ps.num_random_samplings, + ps.rand_xor_mask, + num_seeds, + max_candidates, + max_itopk, + ps.itopk_size, + ps.search_width, + ps.min_iterations, + ps.max_iterations, + sample_filter, + ps.persistent_lifetime, + ps.persistent_device_usage) + ->launch(topk_indices_ptr, topk_distances_ptr, queries_ptr, num_queries, topk); + } else { + using descriptor_base_type = dataset_descriptor_base_t; + auto kernel = search_kernel_config:: + choose_itopk_and_mx_candidates(ps.itopk_size, num_itopk_candidates, block_size); + RAFT_CUDA_TRY( + cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + dim3 thread_dims(block_size, 1, 1); + dim3 block_dims(1, num_queries, 1); + RAFT_LOG_DEBUG( + "Launching kernel with %u threads, %u block %u smem", block_size, num_queries, smem_size); + kernel<<>>(topk_indices_ptr, + topk_distances_ptr, + topk, + dataset_desc.dev_ptr(stream), + queries_ptr, + graph.data_handle(), + graph.extent(1), + source_indices_ptr, + ps.num_random_samplings, + ps.rand_xor_mask, + dev_seed_ptr, + num_seeds, + hashmap_ptr, + max_candidates, + max_itopk, + ps.itopk_size, + ps.search_width, + ps.min_iterations, + ps.max_iterations, + num_executed_iterations, + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + sample_filter); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_common.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_common.cuh new file mode 100644 index 0000000000..b1e2191fec --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_common.cuh @@ -0,0 +1,63 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Common logic for computing max_candidates and max_itopk +struct LaunchConfig { + uint32_t max_candidates; + uint32_t max_itopk; + bool topk_by_bitonic_sort; + bool bitonic_sort_and_merge_multi_warps; +}; + +inline LaunchConfig compute_launch_config(uint32_t num_itopk_candidates, + uint32_t itopk_size, + uint32_t block_size) +{ + LaunchConfig config{}; + + // Compute max_candidates + if (num_itopk_candidates <= 64) { + config.max_candidates = 64; + } else if (num_itopk_candidates <= 128) { + config.max_candidates = 128; + } else if (num_itopk_candidates <= 256) { + config.max_candidates = 256; + } else { + config.max_candidates = 32; // irrelevant, radix based topk is used + } + + // Compute max_itopk and sort flags + config.topk_by_bitonic_sort = (num_itopk_candidates <= 256); + config.bitonic_sort_and_merge_multi_warps = false; + + if (config.topk_by_bitonic_sort) { + if (itopk_size <= 64) { + config.max_itopk = 64; + } else if (itopk_size <= 128) { + config.max_itopk = 128; + } else if (itopk_size <= 256) { + config.max_itopk = 256; + } else { + config.max_itopk = 512; + config.bitonic_sort_and_merge_multi_warps = (block_size >= 64); + } + } else { + if (itopk_size <= 256) { + config.max_itopk = 256; + } else { + config.max_itopk = 512; + } + } + + return config; +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh new file mode 100644 index 0000000000..3a4d49519e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -0,0 +1,943 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#ifndef CUVS_ENABLE_JIT_LTO +#error "search_single_cta_kernel_launcher_jit.cuh included but CUVS_ENABLE_JIT_LTO not defined!" +#endif + +// Include tags header before any other includes that might open namespaces +#include + +#include "compute_distance.hpp" // For dataset_descriptor_host +#include "jit_lto_kernels/search_single_cta_planner.hpp" +#include "search_plan.cuh" // For search_params +#include "search_single_cta_kernel-inl.cuh" // For resource_queue_t, local_deque_t, launcher_t, persistent_runner_base_t, etc. +#include "search_single_cta_kernel_launcher_common.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// The launcher uses types from search_single_cta_kernel-inl.cuh (worker_handle_t, job_desc_t) +// The JIT kernel headers define _jit versions that are compatible + +// Forward declarations +template +auto get_runner_jit(Args... args) -> std::shared_ptr; + +template +auto create_runner_jit(Args... args) -> std::shared_ptr; + +// Debug: Verify JIT launcher is being compiled - force instantiation +struct JitLauncherVerifier { + JitLauncherVerifier() + { + std::cerr << "[JIT] JIT launcher header file included!" << std::endl; + std::cerr.flush(); + } +}; +// Force instantiation by creating a static instance +namespace { +static JitLauncherVerifier g_jit_verifier; +} + +// Helper functions to get tags for JIT LTO +namespace { +template +constexpr auto get_data_type_tag() +{ + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_f{}; } + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_h{}; } + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_sc{}; } + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_uc{}; } +} + +template +constexpr auto get_index_type_tag() +{ + if constexpr (std::is_same_v) { + return cuvs::neighbors::cagra::detail::tag_idx_ui{}; + } +} + +template +constexpr auto get_distance_type_tag() +{ + if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_dist_f{}; } +} + +template +constexpr auto get_source_index_type_tag() +{ + if constexpr (std::is_same_v) { + return cuvs::neighbors::cagra::detail::tag_idx_ui{}; + } +} + +template +std::string get_sample_filter_name() +{ + if constexpr (std::is_same_v) { + return "filter_none"; + } else if constexpr ( + std::is_same_v> || + std::is_same_v>) { + return "filter_bitset"; + } else { + // Default to none filter for unknown types + return "filter_none"; + } +} +} // namespace + +// JIT-compatible launcher_t that works with worker_handle_t (same as non-JIT version) +struct alignas(kCacheLineBytes) launcher_jit_t { + using job_queue_type = resource_queue_t; + using worker_queue_type = resource_queue_t; + using pending_reads_queue_type = local_deque_t; + using completion_flag_type = cuda::atomic; + + pending_reads_queue_type pending_reads; + job_queue_type& job_ids; + worker_queue_type& idle_worker_ids; + worker_handle_t* worker_handles; + uint32_t job_id; + completion_flag_type* completion_flag; + bool all_done = false; + + static inline constexpr auto kDefaultLatency = std::chrono::nanoseconds(50000); + static inline constexpr auto kMaxExpectedLatency = + kDefaultLatency * std::max(10, kMaxJobsNum / 128); + static inline thread_local auto expected_latency = kDefaultLatency; + const std::chrono::time_point start; + std::chrono::time_point now; + const int64_t pause_factor; + int pause_count = 0; + std::chrono::time_point deadline; + + template + launcher_jit_t(job_queue_type& job_ids, + worker_queue_type& idle_worker_ids, + worker_handle_t* worker_handles, + uint32_t n_queries, + std::chrono::milliseconds max_wait_time, + RecordWork record_work) + : pending_reads{std::min(n_queries, kMaxWorkersPerThread)}, + job_ids{job_ids}, + idle_worker_ids{idle_worker_ids}, + worker_handles{worker_handles}, + job_id{job_ids.pop().wait()}, + completion_flag{record_work(job_id)}, + start{std::chrono::system_clock::now()}, + pause_factor{calc_pause_factor(n_queries)}, + now{start}, + deadline{start + max_wait_time + expected_latency} + { + submit_query(idle_worker_ids.pop().wait(), 0); + for (uint32_t i = 1; i < n_queries; i++) { + auto promised_worker = idle_worker_ids.pop(); + uint32_t worker_id; + while (!promised_worker.test(worker_id)) { + if (pending_reads.try_pop_front(worker_id)) { + bool returned_some = false; + for (bool keep_returning = true; keep_returning;) { + if (try_return_worker(worker_id)) { + keep_returning = pending_reads.try_pop_front(worker_id); + returned_some = true; + } else { + pending_reads.push_front(worker_id); + keep_returning = false; + } + } + if (!returned_some) { pause(); } + } else { + worker_id = promised_worker.wait(); + break; + } + } + pause_count = 0; + submit_query(worker_id, i); + if (i >= kSoftMaxWorkersPerThread && pending_reads.try_pop_front(worker_id)) { + if (!try_return_worker(worker_id)) { pending_reads.push_front(worker_id); } + } + } + } + + inline ~launcher_jit_t() noexcept + { + constexpr size_t kWindow = 100; + expected_latency = std::min( + ((kWindow - 1) * expected_latency + now - start) / kWindow, kMaxExpectedLatency); + if (job_id != job_queue_type::kEmpty) { job_ids.push(job_id); } + uint32_t worker_id; + while (pending_reads.try_pop_front(worker_id)) { + idle_worker_ids.push(worker_id); + } + } + + inline void submit_query(uint32_t worker_id, uint32_t query_id) + { + worker_handles[worker_id].data.store(worker_handle_t::data_t{.value = {job_id, query_id}}, + cuda::memory_order_relaxed); + while (!pending_reads.try_push_back(worker_id)) { + auto pending_worker_id = pending_reads.pop_front(); + while (!try_return_worker(pending_worker_id)) { + pause(); + } + } + pause_count = 0; + } + + inline auto try_return_worker(uint32_t worker_id) -> bool + { + if (all_done || + !is_worker_busy(worker_handles[worker_id].data.load(cuda::memory_order_relaxed).handle)) { + idle_worker_ids.push(worker_id); + return true; + } else { + return false; + } + } + + inline auto is_all_done() + { + if (all_done) { return true; } + all_done = completion_flag->load(cuda::memory_order_relaxed); + return all_done; + } + + [[nodiscard]] inline auto sleep_limit() const + { + constexpr auto kMinWakeTime = std::chrono::nanoseconds(10000); + constexpr double kSleepLimit = 0.6; + return start + expected_latency * kSleepLimit - kMinWakeTime; + } + + [[nodiscard]] inline auto overtime_threshold() const + { + constexpr auto kOvertimeFactor = 3; + return start + expected_latency * kOvertimeFactor; + } + + [[nodiscard]] inline auto calc_pause_factor(uint32_t n_queries) const -> uint32_t + { + constexpr uint32_t kMultiplier = 10; + return kMultiplier * raft::div_rounding_up_safe(n_queries, idle_worker_ids.capacity()); + } + + inline void pause() + { + constexpr auto kSpinLimit = 3; + constexpr auto kPauseTimeMin = std::chrono::nanoseconds(1000); + constexpr auto kPauseTimeMax = std::chrono::nanoseconds(50000); + if (pause_count++ < kSpinLimit) { + std::this_thread::yield(); + return; + } + now = std::chrono::system_clock::now(); + auto pause_time_base = std::max(now - start, expected_latency); + auto pause_time = std::clamp(pause_time_base / pause_factor, kPauseTimeMin, kPauseTimeMax); + if (now + pause_time < sleep_limit()) { + std::this_thread::sleep_for(pause_time); + } else if (now <= overtime_threshold()) { + std::this_thread::yield(); + } else if (now <= deadline) { + std::this_thread::sleep_for(pause_time); + } else { + throw raft::exception( + "The calling thread didn't receive the results from the persistent CAGRA kernel within the " + "expected kernel lifetime. Here are possible reasons of this failure:\n" + " (1) `persistent_lifetime` search parameter is too small - increase it;\n" + " (2) there is other work being executed on the same device and the kernel failed to " + "progress - decreasing `persistent_device_usage` may help (but not guaranteed);\n" + " (3) there is a bug in the implementation - please report it to cuVS team."); + } + } + + inline void wait() + { + uint32_t worker_id; + while (pending_reads.try_pop_front(worker_id)) { + while (!try_return_worker(worker_id)) { + if (!is_all_done()) { pause(); } + } + } + pause_count = 0; + now = std::chrono::system_clock::now(); + while (!is_all_done()) { + auto till_time = sleep_limit(); + if (now < till_time) { + std::this_thread::sleep_until(till_time); + now = std::chrono::system_clock::now(); + } else { + pause(); + } + } + job_ids.push(job_id); + job_id = job_queue_type::kEmpty; + } +}; + +// JIT persistent runner - uses AlgorithmLauncher instead of kernel function pointer +template +struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runner_base_t { + using index_type = IndexT; + using distance_type = DistanceT; + using data_type = DataT; + // Use non-JIT types - JIT kernel header will alias _jit versions to these + struct job_desc_helper_desc { + using DATA_T = DataT; + using INDEX_T = IndexT; + using DISTANCE_T = DistanceT; + }; + using job_desc_type = job_desc_t; + + std::shared_ptr launcher; + uint32_t block_size; + dataset_descriptor_host dd_host; + rmm::device_uvector worker_handles; + rmm::device_uvector job_descriptors; + rmm::device_uvector completion_counters; + rmm::device_uvector hashmap; + std::atomic> last_touch; + uint64_t param_hash; + + static inline auto calculate_parameter_hash( + std::reference_wrapper> dataset_desc, + raft::device_matrix_view graph, + const SourceIndexT* source_indices_ptr, + uint32_t max_candidates, + uint32_t num_itopk_candidates, + uint32_t block_size, + uint32_t smem_size, + int64_t hash_bitlen, + size_t small_hash_bitlen, + size_t small_hash_reset_interval, + uint32_t num_random_samplings, + uint64_t rand_xor_mask, + uint32_t num_seeds, + uint32_t max_itopk, + size_t itopk_size, + size_t search_width, + size_t min_iterations, + size_t max_iterations, + SampleFilterT sample_filter, + float persistent_lifetime, + float persistent_device_usage, + std::shared_ptr /* launcher_ptr - not part of hash */, + const void* /* dataset_desc - not part of hash */) -> uint64_t + { + return uint64_t(graph.data_handle()) ^ uint64_t(source_indices_ptr) ^ + dataset_desc.get().team_size ^ num_itopk_candidates ^ block_size ^ smem_size ^ + hash_bitlen ^ small_hash_reset_interval ^ num_random_samplings ^ rand_xor_mask ^ + num_seeds ^ itopk_size ^ search_width ^ min_iterations ^ max_iterations ^ + uint64_t(persistent_lifetime * 1000) ^ uint64_t(persistent_device_usage * 1000); + } + + persistent_runner_jit_t( + std::reference_wrapper> dataset_desc, + raft::device_matrix_view graph, + const SourceIndexT* source_indices_ptr, + uint32_t max_candidates, + uint32_t num_itopk_candidates, + uint32_t block_size, + uint32_t smem_size, + int64_t hash_bitlen, + size_t small_hash_bitlen, + size_t small_hash_reset_interval, + uint32_t num_random_samplings, + uint64_t rand_xor_mask, + uint32_t num_seeds, + uint32_t max_itopk, + size_t itopk_size, + size_t search_width, + size_t min_iterations, + size_t max_iterations, + SampleFilterT sample_filter, + float persistent_lifetime, + float persistent_device_usage, + std::shared_ptr launcher_ptr, + const void* /* dataset_desc - descriptor contains all needed info */) + : persistent_runner_base_t{persistent_lifetime}, + launcher{launcher_ptr}, + block_size{block_size}, + worker_handles(0, stream, worker_handles_mr), + job_descriptors(kMaxJobsNum, stream, job_descriptor_mr), + completion_counters(kMaxJobsNum, stream, device_mr), + hashmap(0, stream, device_mr), + dd_host{dataset_desc.get()}, + param_hash(calculate_parameter_hash(dd_host, + graph, + source_indices_ptr, + max_candidates, + num_itopk_candidates, + block_size, + smem_size, + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + num_random_samplings, + rand_xor_mask, + num_seeds, + max_itopk, + itopk_size, + search_width, + min_iterations, + max_iterations, + sample_filter, + persistent_lifetime, + persistent_device_usage, + launcher_ptr, + nullptr)) // descriptor not needed in hash + { + // set kernel launch parameters + dim3 gs = calc_coop_grid_size(block_size, smem_size, persistent_device_usage); + dim3 bs(block_size, 1, 1); + RAFT_LOG_DEBUG( + "Launching JIT persistent kernel with %u threads, %u block %u smem", bs.x, gs.y, smem_size); + + // initialize the job queue + auto* completion_counters_ptr = completion_counters.data(); + auto* job_descriptors_ptr = job_descriptors.data(); + for (uint32_t i = 0; i < kMaxJobsNum; i++) { + auto& jd = job_descriptors_ptr[i].input.value; + jd.result_indices_ptr = 0; + jd.result_distances_ptr = nullptr; + jd.queries_ptr = nullptr; + jd.top_k = 0; + jd.n_queries = 0; + job_descriptors_ptr[i].completion_flag.store(false); + job_queue.push(i); + } + + // initialize the worker queue + worker_queue.set_capacity(gs.y); + worker_handles.resize(gs.y, stream); + auto* worker_handles_ptr = worker_handles.data(); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + for (uint32_t i = 0; i < gs.y; i++) { + worker_handles_ptr[i].data.store({kWaitForWork}); + worker_queue.push(i); + } + + index_type* hashmap_ptr = nullptr; + if (small_hash_bitlen == 0) { + hashmap.resize(gs.y * hashmap::get_size(hash_bitlen), stream); + hashmap_ptr = hashmap.data(); + } + + // Prepare kernel arguments + // Note: For non-VPQ, the dataset pointer is accessed via the descriptor on device + // Get the device descriptor pointer - kernel will use the concrete type from template + const auto* dev_desc = dataset_desc.get().dev_ptr(stream); + + // Launch the persistent kernel via AlgorithmLauncher + // The persistent kernel now takes the descriptor pointer directly + launcher->dispatch_cooperative(stream, + gs, + bs, + smem_size, + worker_handles_ptr, + job_descriptors_ptr, + completion_counters_ptr, + graph.data_handle(), + graph.extent(1), + source_indices_ptr, + num_random_samplings, + rand_xor_mask, + nullptr, // seed_ptr + num_seeds, + hashmap_ptr, + max_candidates, + max_itopk, + itopk_size, + search_width, + min_iterations, + max_iterations, + nullptr, // num_executed_iterations + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + dev_desc, // Pass descriptor pointer + bitset_ptr, + bitset_len, + original_nbits); + + RAFT_LOG_INFO( + "Initialized the JIT persistent kernel in stream %zd; job_queue size = %u; worker_queue size " + "= %u", + int64_t((cudaStream_t)stream), + job_queue.capacity(), + worker_queue.capacity()); + last_touch.store(std::chrono::system_clock::now(), std::memory_order_relaxed); + } + + ~persistent_runner_jit_t() noexcept override + { + auto whs = worker_handles.data(); + for (auto i = worker_handles.size(); i > 0; i--) { + whs[worker_queue.pop().wait()].data.store({kNoMoreWork}, cuda::memory_order_relaxed); + } + RAFT_CUDA_TRY_NO_THROW(cudaStreamSynchronize(stream)); + RAFT_LOG_INFO("Destroyed the JIT persistent runner."); + } + + void launch(uintptr_t result_indices_ptr, + distance_type* result_distances_ptr, + const data_type* queries_ptr, + uint32_t num_queries, + uint32_t top_k) + { + launcher_jit_t launcher{job_queue, + worker_queue, + worker_handles.data(), + num_queries, + this->lifetime, + [&job_descriptors = this->job_descriptors, + result_indices_ptr, + result_distances_ptr, + queries_ptr, + top_k, + num_queries](uint32_t job_ix) { + auto& jd = job_descriptors.data()[job_ix].input.value; + auto* cflag = &job_descriptors.data()[job_ix].completion_flag; + jd.result_indices_ptr = result_indices_ptr; + jd.result_distances_ptr = result_distances_ptr; + jd.queries_ptr = queries_ptr; + jd.top_k = top_k; + jd.n_queries = num_queries; + cflag->store(false, cuda::memory_order_relaxed); + cuda::atomic_thread_fence(cuda::memory_order_release, + cuda::thread_scope_system); + return cflag; + }}; + + auto prev_touch = last_touch.load(std::memory_order_relaxed); + if (prev_touch + lifetime / 10 < launcher.now) { + last_touch.store(launcher.now, std::memory_order_relaxed); + } + launcher.wait(); + } + + auto calc_coop_grid_size(uint32_t block_size, uint32_t smem_size, float persistent_device_usage) + -> dim3 + { + int ctas_per_sm = 1; + cudaKernel_t kernel_handle = launcher->get_kernel(); + RAFT_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &ctas_per_sm, kernel_handle, block_size, smem_size)); + int num_sm = raft::getMultiProcessorCount(); + auto n_blocks = static_cast(persistent_device_usage * (ctas_per_sm * num_sm)); + if (n_blocks > kMaxWorkersNum) { + RAFT_LOG_WARN("Limiting the grid size limit due to the size of the queue: %u -> %u", + n_blocks, + kMaxWorkersNum); + n_blocks = kMaxWorkersNum; + } + return {1, n_blocks, 1}; + } +}; + +template +void select_and_run_jit( + const dataset_descriptor_host& dataset_desc, + raft::device_matrix_view graph, + std::optional> source_indices, + uintptr_t topk_indices_ptr, // [num_queries, topk] + DistanceT* topk_distances_ptr, // [num_queries, topk] + const DataT* queries_ptr, // [num_queries, dataset_dim] + uint32_t num_queries, + const IndexT* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* num_executed_iterations, // [num_queries,] + const search_params& ps, + uint32_t topk, + uint32_t num_itopk_candidates, + uint32_t block_size, // + uint32_t smem_size, + int64_t hash_bitlen, + IndexT* hashmap_ptr, + size_t small_hash_bitlen, + size_t small_hash_reset_interval, + uint32_t num_seeds, + SampleFilterT sample_filter, + cudaStream_t stream) +{ + std::cerr << "[JIT] select_and_run_jit called (num_queries=" << num_queries << ", topk=" << topk + << ", persistent=" << (ps.persistent ? "true" : "false") << ")" << std::endl; + std::cerr.flush(); + + const SourceIndexT* source_indices_ptr = + source_indices.has_value() ? source_indices->data_handle() : nullptr; + + // Extract bitset data from filter object (if it's a bitset_filter) + uint32_t* bitset_ptr = nullptr; + SourceIndexT bitset_len = 0; + SourceIndexT original_nbits = 0; + + if constexpr (!std::is_same_v) { + // Try to extract bitset data from the filter + if constexpr (std::is_same_v< + SampleFilterT, + cuvs::neighbors::filtering::bitset_filter>) { + auto bitset_view = sample_filter.view(); + bitset_ptr = const_cast(bitset_view.data()); + bitset_len = static_cast(bitset_view.size()); + original_nbits = static_cast(bitset_view.get_original_nbits()); + } + } + + // Use common logic to compute launch config + auto config = compute_launch_config(num_itopk_candidates, ps.itopk_size, block_size); + uint32_t max_candidates = config.max_candidates; + uint32_t max_itopk = config.max_itopk; + bool topk_by_bitonic_sort = config.topk_by_bitonic_sort; + bool bitonic_sort_and_merge_multi_warps = config.bitonic_sort_and_merge_multi_warps; + + // Handle persistent kernels + if (ps.persistent) { + // Use persistent runner for JIT kernels + using runner_type = + persistent_runner_jit_t; + + // Create planner with tags for persistent kernel + using DataTag = decltype(get_data_type_tag()); + using IndexTag = decltype(get_index_type_tag()); + using DistTag = decltype(get_distance_type_tag()); + using SourceTag = decltype(get_source_index_type_tag()); + + std::cerr << "[JIT] Using JIT path for CAGRA persistent search" << std::endl; + std::cerr.flush(); + + CagraSearchPlanner planner( + dataset_desc.metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len, + true /* persistent */); + + // Add device functions + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_sample_filter_device_function(get_sample_filter_name()); + + // Get launcher for persistent kernel + auto launcher = planner.get_launcher(); + if (!launcher) { RAFT_FAIL("Failed to get JIT launcher for CAGRA persistent search kernel"); } + + // Use get_runner pattern similar to non-JIT version + get_runner_jit(std::cref(dataset_desc), + graph, + source_indices_ptr, + max_candidates, + num_itopk_candidates, + block_size, + smem_size, + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + ps.num_random_samplings, + ps.rand_xor_mask, + num_seeds, + max_itopk, + ps.itopk_size, + ps.search_width, + ps.min_iterations, + ps.max_iterations, + sample_filter, + ps.persistent_lifetime, + ps.persistent_device_usage, + launcher, + dataset_desc.dev_ptr(stream)) // Pass descriptor pointer + ->launch(topk_indices_ptr, topk_distances_ptr, queries_ptr, num_queries, topk); + return; + } else { + // Create planner with tags for regular kernel + using DataTag = decltype(get_data_type_tag()); + using IndexTag = decltype(get_index_type_tag()); + using DistTag = decltype(get_distance_type_tag()); + using SourceTag = decltype(get_source_index_type_tag()); + + std::cerr << "[JIT] Using JIT path for CAGRA search" << std::endl; + std::cerr.flush(); + + CagraSearchPlanner planner( + dataset_desc.metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + + // Add device functions (tags are determined inside the planner methods) + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_sample_filter_device_function(get_sample_filter_name()); + + // Get launcher + auto launcher = planner.get_launcher(); + if (!launcher) { RAFT_FAIL("Failed to get JIT launcher for CAGRA search kernel"); } + + // Get the device descriptor pointer - dev_ptr() initializes it if needed + const auto* dev_desc = dataset_desc.dev_ptr(stream); + + dim3 grid(1, num_queries, 1); + dim3 block(block_size, 1, 1); + + RAFT_LOG_DEBUG("Launching JIT kernel with %u threads, %u blocks, %u smem", + block_size, + num_queries, + smem_size); + + // Dispatch kernel via launcher + // The kernel signature expects const desc_t* where desc_t is the concrete descriptor type + // We pass the base pointer (const dataset_descriptor_base_t*), and since both concrete types + // inherit from the base class with the base class at offset 0, the pointer value is the same. + // The dispatch() function takes the address of each argument (&dev_desc), so the kernel + // receives a pointer to the descriptor pointer. The JIT-compiled kernel expects const desc_t*, + // so it will interpret the pointer value as the concrete type it was compiled for. Note: We + // cannot use dynamic_cast because the base class has no virtual functions (uses function + // pointers for performance). We also cannot use static_cast because the concrete type is only + // known at JIT compile time, not at launcher compile time. The pointer value is correct, so + // the kernel can safely use it as the concrete type. + launcher->dispatch( + stream, + grid, + block, + smem_size, + topk_indices_ptr, + topk_distances_ptr, + topk, + queries_ptr, + graph.data_handle(), + graph.extent(1), + source_indices_ptr, + ps.num_random_samplings, + ps.rand_xor_mask, + dev_seed_ptr, + num_seeds, + hashmap_ptr, + max_candidates, + max_itopk, + ps.itopk_size, // internal_topk + ps.search_width, + ps.min_iterations, + ps.max_iterations, + num_executed_iterations, + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + dev_desc, // Pass base pointer - kernel expects concrete type but pointer value is same + bitset_ptr, + bitset_len, + original_nbits); + + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } +} + +// Wrapper to match the non-JIT interface +// This function MUST be called if JIT is enabled +template +void select_and_run( + const dataset_descriptor_host& dataset_desc, + raft::device_matrix_view graph, + std::optional> source_indices, + uintptr_t topk_indices_ptr, // [num_queries, topk] + DistanceT* topk_distances_ptr, // [num_queries, topk] + const DataT* queries_ptr, // [num_queries, dataset_dim] + uint32_t num_queries, + const IndexT* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* num_executed_iterations, // [num_queries,] + const search_params& ps, + uint32_t topk, + uint32_t num_itopk_candidates, + uint32_t block_size, // + uint32_t smem_size, + int64_t hash_bitlen, + IndexT* hashmap_ptr, + size_t small_hash_bitlen, + size_t small_hash_reset_interval, + uint32_t num_seeds, + SampleFilterT sample_filter, + cudaStream_t stream) +{ + // CRITICAL: Write to file to prove function is called + { + std::ofstream f("/tmp/jit_wrapper_called.txt", std::ios::app); + f << "[JIT] select_and_run wrapper CALLED! num_queries=" << num_queries << ", topk=" << topk + << std::endl; + f.close(); + } + + // Also try all output methods + fprintf(stderr, "\n[JIT] ========================================\n"); + fprintf(stderr, "[JIT] select_and_run JIT WRAPPER CALLED!\n"); + fprintf(stderr, "[JIT] num_queries=%u, topk=%u\n", num_queries, topk); + fprintf(stderr, "[JIT] ========================================\n\n"); + fflush(stderr); + printf("[JIT] select_and_run JIT WRAPPER CALLED! (stdout)\n"); + fflush(stdout); + + // Verify JIT launcher is loaded (static initializer already ran) + std::cerr << "[JIT] select_and_run wrapper called (JIT path active)" << std::endl; + std::cerr.flush(); + + // Extract parameters for JIT version + // Note: These parameters are not stored in dataset_descriptor_host, so we need to + // compute them or use defaults. For now, we'll need to pass them through the interface + // or compute from available information. For the JIT path, we'll use reasonable defaults + // and let the kernel handle missing information. + // For JIT version, we pass the descriptor directly - all dataset info is in the descriptor + select_and_run_jit(dataset_desc, + graph, + source_indices, + topk_indices_ptr, + topk_distances_ptr, + queries_ptr, + num_queries, + dev_seed_ptr, + num_executed_iterations, + ps, + topk, + num_itopk_candidates, + block_size, + smem_size, + hash_bitlen, + hashmap_ptr, + small_hash_bitlen, + small_hash_reset_interval, + num_seeds, + sample_filter, + stream); +} + +// get_runner for JIT persistent runners (similar to non-JIT version) +template +auto get_runner_jit(Args... args) -> std::shared_ptr +{ + static thread_local std::weak_ptr weak; + auto runner = weak.lock(); + if (runner) { + if (runner->param_hash == RunnerT::calculate_parameter_hash(args...)) { + return runner; + } else { + weak.reset(); + runner.reset(); + } + } + launcher_jit_t::expected_latency = launcher_jit_t::kDefaultLatency; + runner = create_runner_jit(args...); + weak = runner; + return runner; +} + +template +auto create_runner_jit(Args... args) -> std::shared_ptr +{ + std::lock_guard guard(persistent.lock); + std::shared_ptr runner_outer = std::dynamic_pointer_cast(persistent.runner); + if (runner_outer) { + // calculate_parameter_hash needs all args to match constructor signature + // but only uses a subset for the actual hash + if (runner_outer->param_hash == RunnerT::calculate_parameter_hash(args...)) { + return runner_outer; + } else { + runner_outer.reset(); + } + } + persistent.runner.reset(); + + cuda::std::atomic_flag ready{}; + ready.clear(cuda::std::memory_order_relaxed); + std::thread( + [&runner_outer, &ready](Args... thread_args) { + runner_outer = std::make_shared(thread_args...); + auto lifetime = runner_outer->lifetime; + persistent.runner = std::static_pointer_cast(runner_outer); + std::weak_ptr runner_weak = runner_outer; + ready.test_and_set(cuda::std::memory_order_release); + ready.notify_one(); + + while (true) { + std::this_thread::sleep_for(lifetime); + auto runner = runner_weak.lock(); + if (!runner) { return; } + if (runner->last_touch.load(std::memory_order_relaxed) + lifetime < + std::chrono::system_clock::now()) { + std::lock_guard guard(persistent.lock); + if (runner == persistent.runner) { persistent.runner.reset(); } + return; + } + } + }, + args...) + .detach(); + ready.wait(false, cuda::std::memory_order_acquire); + return runner_outer; +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/set_value_batch.cuh b/cpp/src/neighbors/detail/cagra/set_value_batch.cuh new file mode 100644 index 0000000000..c778429ac8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/set_value_batch.cuh @@ -0,0 +1,40 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include +#include + +namespace cuvs::neighbors::cagra::detail::multi_cta_search { + +template +__global__ void set_value_batch_kernel(T* const dev_ptr, + const std::size_t ld, + const T val, + const std::size_t count, + const std::size_t batch_size) +{ + const auto tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= count * batch_size) { return; } + const auto batch_id = tid / count; + const auto elem_id = tid % count; + dev_ptr[elem_id + ld * batch_id] = val; +} + +template +void set_value_batch(T* const dev_ptr, + const std::size_t ld, + const T val, + const std::size_t count, + const std::size_t batch_size, + cudaStream_t cuda_stream) +{ + constexpr std::uint32_t block_size = 256; + const auto grid_size = (count * batch_size + block_size - 1) / block_size; + set_value_batch_kernel + <<>>(dev_ptr, ld, val, count, batch_size); +} + +} // namespace cuvs::neighbors::cagra::detail::multi_cta_search From dd236714f322080d5cf6fde2249c1cc30448c8b0 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 13 Feb 2026 21:09:42 +0000 Subject: [PATCH 091/158] multi-cta still failing --- ...e_distance_to_child_nodes_kernel_vpq.cu.in | 2 +- .../random_pickup_kernel_vpq.cu.in | 2 +- .../search_multi_cta_kernel_jit.cuh | 34 +++++ .../search_multi_cta_kernel_launcher_jit.cuh | 117 ++++++++++++++++-- .../search_single_cta_kernel_launcher_jit.cuh | 22 +++- 5 files changed, 162 insertions(+), 15 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in index 2808bc5900..efec282811 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in @@ -8,7 +8,7 @@ #ifdef BUILD_KERNEL #include -#include +#include #include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in index 8f034a2bce..ffffe7b3ed 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in @@ -8,7 +8,7 @@ #ifdef BUILD_KERNEL #include -#include +#include #include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh index 8386a1d41a..3ab26fd3bd 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -96,7 +96,41 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( } // Also write to indices if (result_indices_ptr != nullptr) { *result_indices_ptr = static_cast(0xCAFEBABE); } + + // Debug: Check if descriptor runtime values match kernel compile-time constants + // The kernel uses DescriptorT::kTeamSize and DescriptorT::kDatasetBlockDim (compile-time) + // The descriptor object has runtime values that should match + uint32_t desc_team_size_bitshift = dataset_desc->team_size_bitshift(); + uint32_t desc_team_size_actual = 1u << desc_team_size_bitshift; + uint32_t kernel_team_size = DescriptorT::kTeamSize; + uint32_t kernel_dataset_block_dim = DescriptorT::kDatasetBlockDim; + + // For standard descriptors, dataset_block_dim is stored in args.extra_word1 as 'ld' + // For VPQ descriptors, it's a compile-time constant only + uint32_t desc_dataset_block_dim = kernel_dataset_block_dim; // Use compile-time constant + if constexpr (!has_kpq_bits_v) { + // Standard descriptor - can read from args.ld + desc_dataset_block_dim = DescriptorT::ld(dataset_desc->args); + } + printf("JIT KERNEL EXECUTING: threadIdx=0, wrote magic values\n"); + printf("JIT KERNEL: Descriptor team_size (from bitshift): %u, Kernel kTeamSize: %u\n", + desc_team_size_actual, + kernel_team_size); + printf("JIT KERNEL: Descriptor dataset_block_dim: %u, Kernel kDatasetBlockDim: %u\n", + desc_dataset_block_dim, + kernel_dataset_block_dim); + if (desc_team_size_actual != kernel_team_size || + desc_dataset_block_dim != kernel_dataset_block_dim) { + printf( + "JIT KERNEL ERROR: Parameter mismatch! team_size: %u vs %u, dataset_block_dim: %u vs %u\n", + desc_team_size_actual, + kernel_team_size, + desc_dataset_block_dim, + kernel_dataset_block_dim); + } else { + printf("JIT KERNEL: Parameters match correctly\n"); + } } __syncthreads(); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 90c3a61be2..2998e0f3fc 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -156,10 +156,11 @@ void select_and_run_jit( RAFT_FAIL("Unsupported metric for multi_cta JIT kernel"); } - // Debug: Check if this is VPQ + // Debug: Check descriptor parameters std::cerr << "[JIT] Dataset descriptor - is_vpq: " << dataset_desc.is_vpq << ", pq_bits: " << dataset_desc.pq_bits << ", pq_len: " << dataset_desc.pq_len - << std::endl; + << ", team_size: " << dataset_desc.team_size + << ", dataset_block_dim: " << dataset_desc.dataset_block_dim << std::endl; std::cerr.flush(); // Create planner and register device functions @@ -171,6 +172,20 @@ void select_and_run_jit( dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); + + // Debug: Verify entrypoint name matches descriptor parameters + std::cerr << "[JIT] Planner entrypoint: " << planner.get_entrypoint_name() << std::endl; + + // CRITICAL: Verify descriptor runtime values match what kernel was compiled for + // The kernel uses DescriptorT::kTeamSize and DescriptorT::kDatasetBlockDim (compile-time) + // But the descriptor object has runtime values that might differ + // We need to check if the kernel we're about to call was compiled for the same values + std::cerr << "[JIT] WARNING: Kernel was compiled for team_size=" << dataset_desc.team_size + << ", dataset_block_dim=" << dataset_desc.dataset_block_dim << " (from entrypoint name)" + << std::endl; + std::cerr << "[JIT] Descriptor runtime values - team_size: " << dataset_desc.team_size + << ", dataset_block_dim: " << dataset_desc.dataset_block_dim << std::endl; + std::cerr.flush(); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -221,11 +236,74 @@ void select_and_run_jit( smem_size); // Get the device descriptor pointer - // dev_ptr() returns const dataset_descriptor_base_t*, but kernel expects const desc_t* - // Since base class is at offset 0, pointer value is the same and kernel can safely cast - const auto* dev_desc = dataset_desc.dev_ptr(stream); + // CRITICAL: dev_ptr() returns const dataset_descriptor_base_t*, but kernel expects const + // DescriptorT* where DescriptorT is the specific derived type (standard_dataset_descriptor_t or + // cagra_q_dataset_descriptor_t) + // + // In C++, you cannot implicitly convert a base pointer to a derived pointer - this requires an + // explicit cast. However, since: + // 1. The object on device is actually of the derived type (we created it that way) + // 2. Base class is at offset 0 in single inheritance (pointer value is the same) + // 3. The kernel was JIT-compiled for the exact derived type matching these parameters + // + // We can safely use reinterpret_cast to convert the base pointer to the derived pointer type. + // The kernel will receive this as the derived type it expects. + const dataset_descriptor_base_t* dev_desc_base = + dataset_desc.dev_ptr(stream); + + // Cast to the derived type pointer - the kernel expects this specific type + // Note: We're casting to the base type pointer, but the kernel signature expects the derived + // type. This works because the pointer value is the same (base at offset 0), and the kernel will + // treat it as the derived type it was compiled for. However, this is technically undefined + // behavior in C++ but works in practice for CUDA kernels due to how they're dispatched. + const auto* dev_desc = dev_desc_base; + + // CRITICAL: Check if descriptor host values match kernel compile-time constants + // The kernel was compiled for specific team_size and dataset_block_dim values (from entrypoint + // name) The descriptor_host object has runtime values that MUST match what the kernel was + // compiled for + std::cerr << "[JIT] CRITICAL CHECK - Verifying descriptor matches kernel:" << std::endl; + std::cerr << "[JIT] Descriptor host values - team_size: " << dataset_desc.team_size + << ", dataset_block_dim: " << dataset_desc.dataset_block_dim << std::endl; + std::cerr << "[JIT] Kernel compiled for (from entrypoint) - team_size: " + << dataset_desc.team_size << ", dataset_block_dim: " << dataset_desc.dataset_block_dim + << std::endl; + + // The kernel uses DescriptorT::kTeamSize and DescriptorT::kDatasetBlockDim (compile-time) + // These MUST match dataset_desc.team_size and dataset_desc.dataset_block_dim + // If they don't match, the kernel will use wrong values and produce incorrect results + if (dataset_desc.team_size != dataset_desc.team_size || + dataset_desc.dataset_block_dim != dataset_desc.dataset_block_dim) { + std::cerr << "[JIT] ERROR: This should never happen - values should always match!" << std::endl; + } else { + std::cerr << "[JIT] OK: Descriptor values match (they're the same source)" << std::endl; + } + std::cerr.flush(); // Dispatch kernel via launcher + std::cerr << "[JIT] About to dispatch kernel with:" << std::endl; + std::cerr << "[JIT] grid: (" << grid_dims.x << ", " << grid_dims.y << ", " << grid_dims.z << ")" + << std::endl; + std::cerr << "[JIT] block: (" << block_dims.x << ", " << block_dims.y << ", " << block_dims.z + << ")" << std::endl; + std::cerr << "[JIT] smem_size: " << smem_size << std::endl; + std::cerr << "[JIT] dev_desc pointer: " << dev_desc << std::endl; + std::cerr.flush(); + + // CRITICAL: Cast size_t/int64_t parameters to match kernel signature exactly + // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly + // graph.extent(1) returns int64_t but kernel expects uint32_t + // traversed_hash_bitlen is int64_t but kernel expects uint32_t + // ps.itopk_size, ps.min_iterations, ps.max_iterations are size_t (8 bytes) but kernel expects + // uint32_t (4 bytes) ps.num_random_samplings is uint32_t but kernel expects unsigned - cast for + // consistency + const uint32_t graph_degree_u32 = static_cast(graph.extent(1)); + const uint32_t traversed_hash_bitlen_u32 = static_cast(traversed_hash_bitlen); + const uint32_t itopk_size_u32 = static_cast(ps.itopk_size); + const uint32_t min_iterations_u32 = static_cast(ps.min_iterations); + const uint32_t max_iterations_u32 = static_cast(ps.max_iterations); + const unsigned num_random_samplings_u = static_cast(ps.num_random_samplings); + launcher->dispatch(stream, grid_dims, block_dims, @@ -236,18 +314,18 @@ void select_and_run_jit( queries_ptr, graph.data_handle(), max_elements, - graph.extent(1), + graph_degree_u32, // Cast int64_t to uint32_t source_indices_ptr, - ps.num_random_samplings, - ps.rand_xor_mask, + num_random_samplings_u, // Cast uint32_t to unsigned for consistency + ps.rand_xor_mask, // uint64_t matches kernel (8 bytes) dev_seed_ptr, num_seeds, visited_hash_bitlen, traversed_hashmap_ptr, - traversed_hash_bitlen, - ps.itopk_size, - ps.min_iterations, - ps.max_iterations, + traversed_hash_bitlen_u32, // Cast int64_t to uint32_t + itopk_size_u32, // Cast size_t to uint32_t + min_iterations_u32, // Cast size_t to uint32_t + max_iterations_u32, // Cast size_t to uint32_t num_executed_iterations, bitset_ptr, bitset_len, @@ -276,6 +354,21 @@ void select_and_run_jit( } else { std::cerr << "[JIT] Stream synchronized successfully - kernel completed" << std::endl; std::cerr.flush(); + + // Check if kernel wrote magic value to verify execution + if (topk_distances_ptr != nullptr && num_queries > 0) { + DistanceT first_distance; + RAFT_CUDA_TRY( + cudaMemcpy(&first_distance, topk_distances_ptr, sizeof(DistanceT), cudaMemcpyDeviceToHost)); + if (first_distance == static_cast(3735928559.0f)) { // 0xDEADBEEF + std::cerr << "[JIT] VERIFIED: Kernel wrote magic value 0xDEADBEEF to first distance!" + << std::endl; + } else { + std::cerr << "[JIT] WARNING: Kernel did NOT write magic value. First distance: " + << first_distance << std::endl; + } + std::cerr.flush(); + } } RAFT_CUDA_TRY(err); } diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 3a4d49519e..59887278c9 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -331,6 +331,9 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn rmm::device_uvector hashmap; std::atomic> last_touch; uint64_t param_hash; + uint32_t* bitset_ptr; // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len; // Bitset length + SourceIndexT original_nbits; // Original number of bits static inline auto calculate_parameter_hash( std::reference_wrapper> dataset_desc, @@ -420,6 +423,23 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn launcher_ptr, nullptr)) // descriptor not needed in hash { + // Extract bitset data from filter object (if it's a bitset_filter) + bitset_ptr = nullptr; + bitset_len = 0; + original_nbits = 0; + + if constexpr (!std::is_same_v) { + // Try to extract bitset data from the filter + if constexpr (std::is_same_v< + SampleFilterT, + cuvs::neighbors::filtering::bitset_filter>) { + auto bitset_view = sample_filter.view(); + bitset_ptr = const_cast(bitset_view.data()); + bitset_len = static_cast(bitset_view.size()); + original_nbits = static_cast(bitset_view.get_original_nbits()); + } + } + // set kernel launch parameters dim3 gs = calc_coop_grid_size(block_size, smem_size, persistent_device_usage); dim3 bs(block_size, 1, 1); @@ -731,7 +751,7 @@ void select_and_run_jit( dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); - planner.add_sample_filter_device_function(get_sample_filter_name()); + planner.add_sample_filter_device_function(get_sample_filter_name()); // Get launcher auto launcher = planner.get_launcher(); From 4f287c185d6426ef54b83bf4b46c1e1a77c17852 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Sun, 15 Feb 2026 01:26:38 +0000 Subject: [PATCH 092/158] attempting to solve 2 kernel issue --- .../modules/generate_jit_lto_kernels.cmake | 7 +- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 2 + cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 100 ++++++++++++++++-- .../cagra/compute_distance_standard-impl.cuh | 24 +++++ .../cagra/compute_distance_vpq-impl.cuh | 10 ++ .../cagra/jit_lto_kernels/filter_none.cuh | 6 +- .../search_multi_cta_kernel_jit.cuh | 7 +- .../search_single_cta_kernel_jit.cuh | 2 + .../search_multi_cta_kernel_launcher_jit.cuh | 18 ++++ .../cagra/search_single_cta_kernel-inl.cuh | 2 + .../detail/cagra/topk_for_cagra/topk_core.cuh | 2 +- cpp/src/neighbors/ivf_common_jit.cuh | 24 +++++ .../ivf_flat_interleaved_scan_kernel.cuh | 2 +- 13 files changed, 187 insertions(+), 19 deletions(-) create mode 100644 cpp/src/neighbors/ivf_common_jit.cuh diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index f227a10520..9103145618 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -317,8 +317,9 @@ function(generate_jit_lto_kernels target) endforeach() endforeach() - # Generate VPQ descriptor fragments (only for L2Expanded and float/half) - foreach(data_idx IN ITEMS 0 1) + # Generate VPQ descriptor fragments (for L2Expanded and all data types: float, half, int8_t, + # uint8_t) + foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) foreach(team_size IN LISTS cagra_team_sizes) @@ -501,7 +502,7 @@ function(generate_jit_lto_kernels target) # Generate single_cta VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to # team_size and dataset_block_dim - foreach(data_idx IN ITEMS 0 1) + foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) foreach(topk_idx IN ITEMS 0 1) diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 66c82ffeb1..df5a72c0be 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -5,6 +5,8 @@ #include +#include + #include AlgorithmLauncher::AlgorithmLauncher(cudaKernel_t k) : kernel{k} {} diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 5b7d708f3b..482d61f776 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -5,7 +5,9 @@ #include "nvjitlink_checker.hpp" +#include #include +#include #include #include #include @@ -144,6 +146,25 @@ std::shared_ptr AlgorithmPlanner::build() result = nvJitLinkDestroy(&handle); RAFT_EXPECTS(result == NVJITLINK_SUCCESS, "nvJitLinkDestroy failed"); + // Debug: Save cubin to disk for inspection with cuobjdump + std::string cubin_path = "/tmp/linked_cubin_" + this->entrypoint + ".cubin"; + // Sanitize filename (replace special chars) + std::replace(cubin_path.begin(), cubin_path.end(), '/', '_'); + std::replace(cubin_path.begin(), cubin_path.end(), ':', '_'); + std::replace(cubin_path.begin(), cubin_path.end(), '<', '_'); + std::replace(cubin_path.begin(), cubin_path.end(), '>', '_'); + std::replace(cubin_path.begin(), cubin_path.end(), ' ', '_'); + FILE* f = fopen(cubin_path.c_str(), "wb"); + if (f) { + fwrite(cubin.get(), 1, cubin_size, f); + fclose(f); + std::cerr << "[JIT] Saved linked cubin to: " << cubin_path << " (size: " << cubin_size + << " bytes)" << std::endl; + std::cerr << "[JIT] Run: cuobjdump --dump-elf-symbols " << cubin_path + << " to see kernel symbols" << std::endl; + std::cerr.flush(); + } + // cubin is linked, so now load it // NOTE: cudaLibrary_t does not need to be freed explicitly cudaLibrary_t library; @@ -162,13 +183,20 @@ std::shared_ptr AlgorithmPlanner::build() // Enumerate kernels - we expect only 1 kernel from the entrypoint fragment // Device function fragments contain only __device__ functions, not __global__ kernels // So they shouldn't show up in kernel enumeration - constexpr unsigned int count = 1; // We expect only 1 kernel from the entrypoint fragment - unsigned int kernel_count = count; - std::unique_ptr kernels{new cudaKernel_t[count]}; - RAFT_CUDA_TRY(cudaLibraryEnumerateKernels(kernels.get(), kernel_count, library)); + // First, query the actual number of kernels using cudaLibraryGetKernelCount (runtime API) + unsigned int kernel_count = 0; + cudaError_t cuda_result = cudaLibraryGetKernelCount(&kernel_count, library); + if (cuda_result != cudaSuccess) { + std::cerr << "[JIT] ERROR: cudaLibraryGetKernelCount failed with error: " << cuda_result << " (" + << cudaGetErrorString(cuda_result) << ")" << std::endl; + std::cerr.flush(); + RAFT_FAIL("cudaLibraryGetKernelCount failed with error: %d (%s)", + cuda_result, + cudaGetErrorString(cuda_result)); + } - std::cerr << "[JIT] AlgorithmPlanner::build - Requested " << count - << " kernel(s), enumeration returned count: " << kernel_count << std::endl; + std::cerr << "[JIT] AlgorithmPlanner::build - Actual kernel count in library: " << kernel_count + << std::endl; std::cerr.flush(); if (kernel_count == 0) { @@ -176,14 +204,64 @@ std::shared_ptr AlgorithmPlanner::build() } if (kernel_count > 1) { - std::cerr << "[JIT] WARNING: Expected 1 kernel but enumeration reports " << kernel_count - << " - using first kernel only" << std::endl; + std::cerr << "[JIT] WARNING: Found " << kernel_count + << " kernels in library! This might be the issue - we're using kernel [0]" + << std::endl; + std::cerr << "[JIT] Entrypoint we're looking for: " << this->entrypoint << std::endl; + std::cerr << "[JIT] This suggests multiple kernels are being linked together!" << std::endl; + std::cerr.flush(); + } + + // Now allocate the right size and enumerate + std::unique_ptr kernels{new cudaKernel_t[kernel_count]}; + unsigned int kernel_count_verify = kernel_count; + RAFT_CUDA_TRY(cudaLibraryEnumerateKernels(kernels.get(), kernel_count_verify, library)); + + if (kernel_count_verify != kernel_count) { + std::cerr << "[JIT] WARNING: Kernel count mismatch - cudaLibraryGetKernelCount returned " + << kernel_count << " but cudaLibraryEnumerateKernels returned " << kernel_count_verify + << std::endl; + std::cerr.flush(); + } + + // With runtime API, we can't get kernel names directly + // If there are multiple kernels, we'll use the first one + // The entrypoint fragment should be added first, so its kernel should be at index 0 + if (kernel_count > 1) { + std::cerr << "[JIT] WARNING: Multiple kernels found (" << kernel_count << "), using kernel [0]" + << std::endl; + std::cerr << "[JIT] Entrypoint we're looking for: " << this->entrypoint << std::endl; + std::cerr << "[JIT] This suggests multiple kernels are being linked together!" << std::endl; + std::cerr << "[JIT] Fragments added:" << std::endl; + for (size_t i = 0; i < this->fragments.size(); ++i) { + std::cerr << "[JIT] Fragment [" << i << "]: "; + if (i == 0) { + std::cerr << "Entrypoint fragment" << std::endl; + } else { + std::cerr << "Device function fragment: " << this->device_functions[i - 1] << std::endl; + } + } + std::cerr.flush(); + } + + // When multiple kernels are found, one is often CUB's EmptyKernel (a weak symbol + // instantiated when CUB headers are included). The entrypoint fragment is added first, + // so its kernel should be at index 0. However, the order is not guaranteed - sometimes + // CUB's EmptyKernel is at index 0, sometimes at index 1. + // Strategy: Try kernel[0] first. If it's EmptyKernel, it will be a no-op and won't affect + // results. We can't distinguish EmptyKernel from our kernel without names, so we'll use kernel[0] + // and rely on the fact that EmptyKernel does nothing. + unsigned int kernel_index = 0; + if (kernel_count > 1) { + std::cerr << "[JIT] WARNING: Found " << kernel_count + << " kernels (CUB EmptyKernel may be present). Using kernel [0]" << std::endl; + std::cerr << "[JIT] If kernel [0] is EmptyKernel, results will be incorrect" << std::endl; + std::cerr << "[JIT] Entrypoint fragment is added first, so kernel [0] should be correct" + << std::endl; std::cerr.flush(); } - // Use the first (and should be only) kernel from the entrypoint fragment - // Entrypoint fragment is added first, so its kernel should be at index 0 - auto kernel = kernels.release()[0]; + auto kernel = kernels.release()[kernel_index]; // Validate the kernel pointer is reasonable (not null, not obviously garbage) if (kernel == nullptr) { diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh index c2580ac86c..3e52991c3a 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh @@ -243,6 +243,10 @@ _RAFT_DEVICE __noinline__ auto compute_distance_standard( return distance; } +#ifndef BUILD_KERNEL +// The init kernel is not needed when building JIT fragments (BUILD_KERNEL is defined) +// It's only needed for non-JIT initialization. When BUILD_KERNEL is defined, we're building +// a JIT fragment and don't want this kernel to be instantiated. template ; using base_type = typename desc_type::base_type; + + // Debug: Verify we're constructing the right type + if (threadIdx.x == 0 && blockIdx.x == 0) { + printf( + "[INIT KERNEL] Constructing desc_type with: Metric=%d, TeamSize=%u, DatasetBlockDim=%u, " + "DataT=float, IndexT=uint32_t, DistanceT=float\n", + static_cast(Metric), + TeamSize, + DatasetBlockDim); + printf("[INIT KERNEL] desc_type::kTeamSize=%u, desc_type::kDatasetBlockDim=%u\n", + desc_type::kTeamSize, + desc_type::kDatasetBlockDim); + } + #ifdef CUVS_ENABLE_JIT_LTO // For JIT, we don't use the function pointers, so set them to nullptr // The free functions are called directly instead @@ -283,7 +301,12 @@ RAFT_KERNEL __launch_bounds__(1, 1) dataset_norms); #endif } +#endif // #ifndef BUILD_KERNEL +#ifndef BUILD_KERNEL +// The init_ function is not needed when building JIT fragments (BUILD_KERNEL is defined) +// It's only needed for non-JIT initialization. When BUILD_KERNEL is defined, we're building +// a JIT fragment and don't want this host function to be included. template + +// Note: We don't include sample_filter.cuh here because it's not needed for JIT +// The JIT version defines its own sample_filter function directly +// #include "../../../sample_filter.cuh" namespace cuvs::neighbors::cagra::detail { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh index 3ab26fd3bd..803653a700 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -55,7 +55,7 @@ template -RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( +__global__ __launch_bounds__(1024, 1) void search_kernel_jit( IndexT* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] DistanceT* const result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] const DescriptorT* dataset_desc, // Concrete descriptor type from template @@ -79,6 +79,7 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( SourceIndexT bitset_len, // Bitset length SourceIndexT original_nbits) // Original number of bits { + printf("IN THE KERNEL\n"); using DATA_T = DataT; using INDEX_T = IndexT; using DISTANCE_T = DistanceT; @@ -89,7 +90,9 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( // CRITICAL DEBUG: Write to result buffer IMMEDIATELY to verify kernel is executing // Write a magic value that we can check on host - do this before ANY other code - if (threadIdx.x == 0 && blockIdx.x == 0 && blockIdx.y == 0) { + // Write from the first thread of the first block to maximize chance of execution + if (threadIdx.x == 0 && blockIdx.x == 0 && blockIdx.y == 0 && result_distances_ptr != nullptr && + result_indices_ptr != nullptr) { // Write magic value to first distance to verify kernel execution if (result_distances_ptr != nullptr) { *result_distances_ptr = static_cast(3735928559.0f); // 0xDEADBEEF as float diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh index 4f76c1b224..763d027e6d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -28,6 +28,8 @@ #include "../topk_by_radix.cuh" #include "../topk_for_cagra/topk.h" +#include + #include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 2998e0f3fc..4fb0025c0d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -204,6 +204,24 @@ void select_and_run_jit( auto params = make_fragment_key(); auto launcher = planner.get_launcher(); + if (!launcher) { + std::cerr << "[JIT] ERROR: Failed to get launcher - planner.get_launcher() returned null!" + << std::endl; + std::cerr.flush(); + RAFT_FAIL("Failed to get JIT launcher"); + } + + // Verify kernel handle is valid + cudaKernel_t kernel_handle = launcher->get_kernel(); + if (kernel_handle == nullptr) { + std::cerr << "[JIT] ERROR: Launcher has null kernel handle!" << std::endl; + std::cerr.flush(); + RAFT_FAIL("JIT launcher has null kernel handle"); + } + std::cerr << "[JIT] Launcher obtained successfully, kernel handle: " << kernel_handle + << std::endl; + std::cerr.flush(); + uint32_t max_elements{}; if (result_buffer_size <= 64) { max_elements = 64; diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 87da77456b..5bd34da6f4 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -15,6 +15,8 @@ #include "topk_for_cagra/topk.h" // TODO replace with raft topk #include "utils.hpp" +#include + #include #include #include diff --git a/cpp/src/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh b/cpp/src/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh index 93b78b8177..e1153dce08 100644 --- a/cpp/src/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh +++ b/cpp/src/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh @@ -7,7 +7,7 @@ #include "../utils.hpp" #include "topk.h" -#include +#include #include #include diff --git a/cpp/src/neighbors/ivf_common_jit.cuh b/cpp/src/neighbors/ivf_common_jit.cuh new file mode 100644 index 0000000000..31cd96ecd1 --- /dev/null +++ b/cpp/src/neighbors/ivf_common_jit.cuh @@ -0,0 +1,24 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include // matrix::detail::select::warpsort::warp_sort_distributed + +namespace cuvs::neighbors::ivf::detail { + +/** + * Dummy block sort type used when Capacity is 0 in JIT kernels. + * This is a minimal header that doesn't include CUB to avoid EmptyKernel instantiation. + */ +template +struct dummy_block_sort_t { + using queue_t = raft::matrix::detail::select::warpsort:: + warp_sort_distributed; + template + __device__ dummy_block_sort_t(int k, Args...) {}; +}; + +} // namespace cuvs::neighbors::ivf::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh index 3a14fe8afd..e5e10720eb 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh @@ -5,7 +5,7 @@ #pragma once -#include "../../ivf_common.cuh" +#include "../../ivf_common_jit.cuh" #include From f1888a29ae43816137c1b97e15101eea78d33822 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Sun, 15 Feb 2026 01:32:18 +0000 Subject: [PATCH 093/158] more cleaning --- cpp/bench/ann/CMakeLists.txt | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 7a8177e4b8..8d254c0933 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -240,18 +240,6 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) ConfigureAnnBench( NAME CUVS_IVF_FLAT PATH src/cuvs/cuvs_benchmark.cu src/cuvs/cuvs_ivf_flat.cu LINKS cuvs ) - - # UDF benchmark - standalone executable - add_executable(CUVS_IVF_FLAT_UDF_BENCH src/cuvs/ivf_flat_udf_bench.cu) - target_link_libraries(CUVS_IVF_FLAT_UDF_BENCH PRIVATE cuvs) - set_target_properties( - CUVS_IVF_FLAT_UDF_BENCH - PROPERTIES CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - ) - add_dependencies(CUVS_ANN_BENCH_ALL CUVS_IVF_FLAT_UDF_BENCH) endif() if(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE) From b596e79fbee299a08993d898d7de362f630854ef Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Sun, 15 Feb 2026 01:34:30 +0000 Subject: [PATCH 094/158] merge cleanly --- cpp/CMakeLists.txt | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9e6778382b..a9938de7da 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -621,13 +621,6 @@ if(NOT BUILD_CPU_ONLY) INTERFACE "$" ) - target_include_directories( - cuvs_objs - PUBLIC "$" - "$" - INTERFACE "$" - ) - # Endian detection include(TestBigEndian) test_big_endian(BIG_ENDIAN) @@ -702,12 +695,7 @@ if(NOT BUILD_CPU_ONLY) $ $ $<$:CUDA::nvJitLink> - <<<<<<< - HEAD $<$:CUDA::nvrtc> - ======= - >>>>>>> - origin/main $<$:$> ) @@ -768,12 +756,7 @@ SECTIONS PRIVATE $ $<$:CUDA::nvJitLink> - <<<<<<< - HEAD $<$:CUDA::nvrtc> - ======= - >>>>>>> - origin/main $<$:CUDA::nvtx3> $ $ From 9c4980fa4d24ab3317ce46b5adbf039d078c423b Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Sun, 15 Feb 2026 01:38:38 +0000 Subject: [PATCH 095/158] add nvrtc as a dependency --- conda/environments/all_cuda-131_arch-aarch64.yaml | 1 + conda/environments/all_cuda-131_arch-x86_64.yaml | 1 + .../bench_ann_cuda-131_arch-aarch64.yaml | 1 + .../bench_ann_cuda-131_arch-x86_64.yaml | 1 + conda/environments/go_cuda-131_arch-aarch64.yaml | 1 + conda/environments/go_cuda-131_arch-x86_64.yaml | 1 + conda/environments/rust_cuda-131_arch-aarch64.yaml | 1 + conda/environments/rust_cuda-131_arch-x86_64.yaml | 1 + conda/recipes/libcuvs/recipe.yaml | 13 +++++++++++++ dependencies.yaml | 7 ++++--- python/libcuvs/pyproject.toml | 2 +- 11 files changed, 26 insertions(+), 4 deletions(-) diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index af6b71990e..71e530bf55 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -13,6 +13,7 @@ dependencies: - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc +- cuda-nvrtc-dev - cuda-nvtx-dev - cuda-profiler-api - cuda-python>=13.0.1,<14.0 diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index 20fe9b82a2..937bae6576 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -13,6 +13,7 @@ dependencies: - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc +- cuda-nvrtc-dev - cuda-nvtx-dev - cuda-profiler-api - cuda-python>=13.0.1,<14.0 diff --git a/conda/environments/bench_ann_cuda-131_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-131_arch-aarch64.yaml index b11035fcd6..65c27a96d8 100644 --- a/conda/environments/bench_ann_cuda-131_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-131_arch-aarch64.yaml @@ -12,6 +12,7 @@ dependencies: - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc +- cuda-nvrtc-dev - cuda-nvtx-dev - cuda-profiler-api - cuda-python>=13.0.1,<14.0 diff --git a/conda/environments/bench_ann_cuda-131_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-131_arch-x86_64.yaml index 48d203af8b..cba1d508e7 100644 --- a/conda/environments/bench_ann_cuda-131_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-131_arch-x86_64.yaml @@ -12,6 +12,7 @@ dependencies: - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc +- cuda-nvrtc-dev - cuda-nvtx-dev - cuda-profiler-api - cuda-python>=13.0.1,<14.0 diff --git a/conda/environments/go_cuda-131_arch-aarch64.yaml b/conda/environments/go_cuda-131_arch-aarch64.yaml index 135f6a88cc..4c5f1862c9 100644 --- a/conda/environments/go_cuda-131_arch-aarch64.yaml +++ b/conda/environments/go_cuda-131_arch-aarch64.yaml @@ -12,6 +12,7 @@ dependencies: - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc +- cuda-nvrtc-dev - cuda-nvtx-dev - cuda-profiler-api - cuda-version=13.1 diff --git a/conda/environments/go_cuda-131_arch-x86_64.yaml b/conda/environments/go_cuda-131_arch-x86_64.yaml index df6a779331..0bd7c0a2d3 100644 --- a/conda/environments/go_cuda-131_arch-x86_64.yaml +++ b/conda/environments/go_cuda-131_arch-x86_64.yaml @@ -12,6 +12,7 @@ dependencies: - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc +- cuda-nvrtc-dev - cuda-nvtx-dev - cuda-profiler-api - cuda-version=13.1 diff --git a/conda/environments/rust_cuda-131_arch-aarch64.yaml b/conda/environments/rust_cuda-131_arch-aarch64.yaml index 062cbc8ea0..2c6636e695 100644 --- a/conda/environments/rust_cuda-131_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-131_arch-aarch64.yaml @@ -11,6 +11,7 @@ dependencies: - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc +- cuda-nvrtc-dev - cuda-nvtx-dev - cuda-profiler-api - cuda-version=13.1 diff --git a/conda/environments/rust_cuda-131_arch-x86_64.yaml b/conda/environments/rust_cuda-131_arch-x86_64.yaml index 2b96d4a64e..dbe4367816 100644 --- a/conda/environments/rust_cuda-131_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-131_arch-x86_64.yaml @@ -11,6 +11,7 @@ dependencies: - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc +- cuda-nvrtc-dev - cuda-nvtx-dev - cuda-profiler-api - cuda-version=13.1 diff --git a/conda/recipes/libcuvs/recipe.yaml b/conda/recipes/libcuvs/recipe.yaml index abd3031a94..5d12000ded 100644 --- a/conda/recipes/libcuvs/recipe.yaml +++ b/conda/recipes/libcuvs/recipe.yaml @@ -75,6 +75,7 @@ cache: - if: cuda_major == "13" then: - libnvjitlink-dev + - cuda-nvrtc-dev - librmm =${{ minor_version }} - libraft-headers =${{ minor_version }} - nccl ${{ nccl_version }} @@ -124,6 +125,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink-dev + - cuda-nvrtc-dev run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - libraft-headers =${{ minor_version }} @@ -137,6 +139,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink + - cuda-nvrtc ignore_run_exports: by_name: - cuda-cudart @@ -153,6 +156,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink + - cuda-nvrtc about: homepage: ${{ load_from_file("python/libcuvs/pyproject.toml").project.urls.Homepage }} license: ${{ load_from_file("python/libcuvs/pyproject.toml").project.license }} @@ -192,6 +196,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink-dev + - cuda-nvrtc-dev run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - ${{ pin_subpackage("libcuvs-headers", exact=True) }} @@ -206,6 +211,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink + - cuda-nvrtc ignore_run_exports: by_name: - cuda-cudart @@ -222,6 +228,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink + - cuda-nvrtc about: homepage: ${{ load_from_file("python/libcuvs/pyproject.toml").project.urls.Homepage }} license: ${{ load_from_file("python/libcuvs/pyproject.toml").project.license }} @@ -259,6 +266,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink-dev + - cuda-nvrtc-dev run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - ${{ pin_subpackage("libcuvs-headers", exact=True) }} @@ -273,6 +281,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink + - cuda-nvrtc ignore_run_exports: by_name: - cuda-cudart @@ -286,6 +295,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink + - cuda-nvrtc - librmm - mkl - nccl @@ -426,6 +436,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink-dev + - cuda-nvrtc-dev run: - ${{ pin_subpackage("libcuvs-headers", exact=True) }} - ${{ pin_subpackage("libcuvs", exact=True) }} @@ -439,6 +450,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink + - cuda-nvrtc ignore_run_exports: by_name: - cuda-cudart @@ -452,6 +464,7 @@ outputs: - if: cuda_major == "13" then: - libnvjitlink + - cuda-nvrtc - librmm - mkl - nccl diff --git a/dependencies.yaml b/dependencies.yaml index fca48befa0..0fac57de08 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -332,6 +332,7 @@ dependencies: cuda: "13.*" packages: - libnvjitlink-dev + - cuda-nvrtc-dev - matrix: cuda: "12.*" packages: @@ -343,12 +344,12 @@ dependencies: cuda: "12.*" use_cuda_wheels: "true" packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink]==12.* + - cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink,nvrtc]==12.* - matrix: cuda: "13.*" use_cuda_wheels: "true" packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink]==13.* + - cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink,nvrtc]==13.* - matrix: use_cuda_wheels: "false" packages: @@ -356,7 +357,7 @@ dependencies: # (just as a source of documentation, as this populates pyproject.toml in source control) - matrix: packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink]>=12,<14 + - cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink,nvrtc]>=12,<14 depends_on_cupy: common: - output_types: conda diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml index f43bc35dbf..c7e0a57515 100644 --- a/python/libcuvs/pyproject.toml +++ b/python/libcuvs/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = "Apache-2.0" requires-python = ">=3.11" dependencies = [ - "cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink]>=12,<14", + "cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink,nvrtc]>=12,<14", "libraft==26.4.*,>=0.0.0a0", "librmm==26.4.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. From f27eeb21797314db52098ca0e0d1a241da99d607 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Sun, 15 Feb 2026 02:37:23 +0000 Subject: [PATCH 096/158] fix build errors --- cpp/include/cuvs/neighbors/ivf_flat.hpp | 8 ++++---- cpp/src/detail/jit_lto/FragmentEntry.cu | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index c9fc02a696..6c74685710 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -3076,7 +3076,7 @@ struct point { storage_type data_; - __device__ __host__ point() = default; + point() = default; __device__ __host__ explicit point(storage_type d) : data_(d) {} __device__ __forceinline__ storage_type raw() const { return data_; } @@ -3117,7 +3117,7 @@ struct metric_interface { using point_type = point; virtual __device__ void operator()(AccT& acc, point_type x, point_type y) = 0; - virtual __device__ ~metric_interface() = default; + virtual ~metric_interface() = default; }; // ============================================================ @@ -3225,7 +3225,7 @@ struct point { storage_type data_; - __device__ __host__ point() = default; + point() = default; __device__ __host__ explicit point(storage_type d) : data_(d) {} __device__ __forceinline__ storage_type raw() const { return data_; } @@ -3265,7 +3265,7 @@ struct metric_interface { using point_type = point; virtual __device__ void operator()(AccT& acc, point_type x, point_type y) = 0; - virtual __device__ ~metric_interface() = default; + virtual ~metric_interface() = default; }; )"; diff --git a/cpp/src/detail/jit_lto/FragmentEntry.cu b/cpp/src/detail/jit_lto/FragmentEntry.cu index afaca5d5da..84caa207d5 100644 --- a/cpp/src/detail/jit_lto/FragmentEntry.cu +++ b/cpp/src/detail/jit_lto/FragmentEntry.cu @@ -26,7 +26,6 @@ bool FatbinFragmentEntry::add_to(nvJitLinkHandle& handle) const check_nvjitlink_result(handle, result); return true; } -<<<<<<< HEAD NVRTCFragmentEntry::NVRTCFragmentEntry(std::string const& key, std::unique_ptr&& program, @@ -43,5 +42,3 @@ bool NVRTCFragmentEntry::add_to(nvJitLinkHandle& handle) const return true; } -======= ->>>>>>> origin/main From bc5c90ed841d6ff1966d6b8dadd8a3388080fd30 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Sun, 15 Feb 2026 02:38:50 +0000 Subject: [PATCH 097/158] guard udf use --- cpp/include/cuvs/neighbors/ivf_flat.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index 6c74685710..f5b12b7445 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -3052,6 +3052,7 @@ void recompute_internal_state(const raft::resources& res, index Date: Sun, 15 Feb 2026 03:45:33 +0000 Subject: [PATCH 098/158] analyzing cubins --- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 23 +++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 482d61f776..8fa6f89273 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -6,6 +6,7 @@ #include "nvjitlink_checker.hpp" #include +#include #include #include #include @@ -156,12 +157,24 @@ std::shared_ptr AlgorithmPlanner::build() std::replace(cubin_path.begin(), cubin_path.end(), ' ', '_'); FILE* f = fopen(cubin_path.c_str(), "wb"); if (f) { - fwrite(cubin.get(), 1, cubin_size, f); + size_t written = fwrite(cubin.get(), 1, cubin_size, f); fclose(f); - std::cerr << "[JIT] Saved linked cubin to: " << cubin_path << " (size: " << cubin_size - << " bytes)" << std::endl; - std::cerr << "[JIT] Run: cuobjdump --dump-elf-symbols " << cubin_path - << " to see kernel symbols" << std::endl; + if (written == cubin_size) { + std::cerr << "[JIT] =========================================" << std::endl; + std::cerr << "[JIT] Saved linked cubin to: " << cubin_path << " (size: " << cubin_size + << " bytes)" << std::endl; + std::cerr << "[JIT] Run: cuobjdump --dump-elf-symbols " << cubin_path + << " to see kernel symbols" << std::endl; + std::cerr << "[JIT] =========================================" << std::endl; + std::cerr.flush(); + } else { + std::cerr << "[JIT] WARNING: Failed to write full cubin (wrote " << written << " of " + << cubin_size << " bytes)" << std::endl; + std::cerr.flush(); + } + } else { + std::cerr << "[JIT] WARNING: Failed to open cubin file for writing: " << cubin_path + << " (errno: " << errno << ")" << std::endl; std::cerr.flush(); } From 55c32f4711c6949c9c6b3c4e7337ab9c7ec16365 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Sun, 15 Feb 2026 03:50:53 +0000 Subject: [PATCH 099/158] compiler definition on headers --- cpp/CMakeLists.txt | 18 ++++++++++-------- .../detail/jit_lto/NVRTCLTOFragmentCompiler.cu | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a9938de7da..d934d0e5a3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -213,6 +213,13 @@ endif() # ################################################################################################## # * cuvs --------------------------------------------------------------------- if(NOT BUILD_CPU_ONLY) + set(JIT_LTO_TARGET_ARCHITECTURE "") + set(JIT_LTO_COMPILATION OFF) + set(JIT_LTO_FILES "") + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + set(JIT_LTO_TARGET_ARCHITECTURE "75-real") + set(JIT_LTO_COMPILATION ON) + endif() add_library(cuvs_cpp_headers INTERFACE) add_library(cuvs::cuvs_cpp_headers ALIAS cuvs_cpp_headers) @@ -222,6 +229,9 @@ if(NOT BUILD_CPU_ONLY) "$" "$" ) + target_compile_definitions( + cuvs_cpp_headers INTERFACE $<$:CUVS_ENABLE_JIT_LTO> + ) target_link_libraries(cuvs_cpp_headers INTERFACE raft::raft rmm::rmm) add_library( @@ -343,14 +353,6 @@ if(NOT BUILD_CPU_ONLY) ) endif() - set(JIT_LTO_TARGET_ARCHITECTURE "") - set(JIT_LTO_COMPILATION OFF) - set(JIT_LTO_FILES "") - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) - set(JIT_LTO_TARGET_ARCHITECTURE "75-real") - set(JIT_LTO_COMPILATION ON) - endif() - if(JIT_LTO_COMPILATION) # Generate interleaved scan kernel files at build time include(cmake/modules/generate_jit_lto_kernels.cmake) diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index 8bae7a7a03..bc4e1aa4cc 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include +#include #include #include From 18664751ea2e1e0acf6ae1b8759ce9be08dac165 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Sun, 15 Feb 2026 04:21:21 +0000 Subject: [PATCH 100/158] guard udf test --- cpp/tests/CMakeLists.txt | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 02885549c2..cc89f0bd26 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -131,12 +131,14 @@ ConfigureTest( PERCENT 100 ) -ConfigureTest( - NAME NEIGHBORS_ANN_IVF_FLAT_UDF_TEST - PATH neighbors/ann_ivf_flat/test_udf.cu - GPUS 1 - PERCENT 100 -) +if(JIT_LTO_COMPILATION) + ConfigureTest( + NAME NEIGHBORS_ANN_IVF_FLAT_UDF_TEST + PATH neighbors/ann_ivf_flat/test_udf.cu + GPUS 1 + PERCENT 100 + ) +endif() ConfigureTest( NAME NEIGHBORS_ANN_IVF_PQ_TEST From c4191732d01426a30cfb3edb4e62e8e2390a7bba Mon Sep 17 00:00:00 2001 From: divyegala Date: Sun, 15 Feb 2026 20:48:03 +0000 Subject: [PATCH 101/158] remove --- cpp/src/cluster/detail/kmeans_common.cuh | 5 ++- .../detail/sparse/coo_spmv_kernel.cuh | 2 +- cpp/src/distance/detail/sparse/utils.cuh | 4 +-- cpp/src/distance/fused_distance_nn-inl.cuh | 4 +-- .../detail/cagra/topk_for_cagra/topk_core.cuh | 2 +- cpp/src/neighbors/detail/fused_l2_knn.cuh | 4 +-- cpp/src/neighbors/detail/knn_utils.cuh | 3 +- .../neighbors/detail/vamana/greedy_search.cuh | 4 +-- .../neighbors/detail/vamana/robust_prune.cuh | 3 +- cpp/src/neighbors/ivf_common.cu | 21 +++++++++++- cpp/src/neighbors/ivf_common.cuh | 32 ++++++------------- cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh | 1 - cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh | 2 +- cpp/src/neighbors/scann/detail/scann_avq.cuh | 6 ++-- .../neighbors/detail/cross_component_nn.cuh | 3 +- cpp/src/stats/detail/silhouette_score.cuh | 4 +-- .../sparse/neighbors/cross_component_nn.cu | 2 -- 17 files changed, 54 insertions(+), 48 deletions(-) diff --git a/cpp/src/cluster/detail/kmeans_common.cuh b/cpp/src/cluster/detail/kmeans_common.cuh index ea93b764f9..c5db4a4cfa 100644 --- a/cpp/src/cluster/detail/kmeans_common.cuh +++ b/cpp/src/cluster/detail/kmeans_common.cuh @@ -31,7 +31,10 @@ #include #include -#include +#include +#include +#include +#include #include #include #include diff --git a/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh b/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh index 857d87fa0d..b60a1dd01d 100644 --- a/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh +++ b/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace cuvs { namespace distance { diff --git a/cpp/src/distance/detail/sparse/utils.cuh b/cpp/src/distance/detail/sparse/utils.cuh index 51ac702c83..50213d0f55 100644 --- a/cpp/src/distance/detail/sparse/utils.cuh +++ b/cpp/src/distance/detail/sparse/utils.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -7,7 +7,7 @@ #include -#include +#include #include #include diff --git a/cpp/src/distance/fused_distance_nn-inl.cuh b/cpp/src/distance/fused_distance_nn-inl.cuh index cdf6b055dc..3fa80a9b60 100644 --- a/cpp/src/distance/fused_distance_nn-inl.cuh +++ b/cpp/src/distance/fused_distance_nn-inl.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -14,7 +14,7 @@ #include #include -#include +#include #include diff --git a/cpp/src/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh b/cpp/src/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh index 93b78b8177..e1153dce08 100644 --- a/cpp/src/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh +++ b/cpp/src/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh @@ -7,7 +7,7 @@ #include "../utils.hpp" #include "topk.h" -#include +#include #include #include diff --git a/cpp/src/neighbors/detail/fused_l2_knn.cuh b/cpp/src/neighbors/detail/fused_l2_knn.cuh index 731744db8d..6d555c54e7 100644 --- a/cpp/src/neighbors/detail/fused_l2_knn.cuh +++ b/cpp/src/neighbors/detail/fused_l2_knn.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -8,7 +8,7 @@ #include #include -#include +#include #include diff --git a/cpp/src/neighbors/detail/knn_utils.cuh b/cpp/src/neighbors/detail/knn_utils.cuh index d6de388ee5..72df390f8e 100644 --- a/cpp/src/neighbors/detail/knn_utils.cuh +++ b/cpp/src/neighbors/detail/knn_utils.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -9,7 +9,6 @@ #include -#include #include #include diff --git a/cpp/src/neighbors/detail/vamana/greedy_search.cuh b/cpp/src/neighbors/detail/vamana/greedy_search.cuh index 717e389c32..4e71c1189c 100644 --- a/cpp/src/neighbors/detail/vamana/greedy_search.cuh +++ b/cpp/src/neighbors/detail/vamana/greedy_search.cuh @@ -1,11 +1,11 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once -#include +#include #include "macros.cuh" #include "priority_queue.cuh" diff --git a/cpp/src/neighbors/detail/vamana/robust_prune.cuh b/cpp/src/neighbors/detail/vamana/robust_prune.cuh index 9fe3c01a8c..31fb6d589f 100644 --- a/cpp/src/neighbors/detail/vamana/robust_prune.cuh +++ b/cpp/src/neighbors/detail/vamana/robust_prune.cuh @@ -1,11 +1,10 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once -#include #include #include diff --git a/cpp/src/neighbors/ivf_common.cu b/cpp/src/neighbors/ivf_common.cu index 8df5a58cd7..9fb7b59b0f 100644 --- a/cpp/src/neighbors/ivf_common.cu +++ b/cpp/src/neighbors/ivf_common.cu @@ -8,7 +8,8 @@ #include #include -#include +#include +#include namespace cuvs::neighbors::ivf::detail { @@ -72,4 +73,22 @@ void calc_chunk_indices::configured::operator()(const uint32_t* cluster_sizes, RAFT_CUDA_TRY(cudaLaunchKernel(kernel, grid_dim, block_dim, args, 0, stream)); } +// Helper function to sort cluster sizes using CUB, extracted from template to avoid +// including cub/device/* in the header file +void sort_cluster_sizes_descending(uint32_t* input, + uint32_t* output, + uint32_t n_lists, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* tmp_res) +{ + int begin_bit = 0; + int end_bit = sizeof(uint32_t) * 8; + size_t cub_workspace_size = 0; + cub::DeviceRadixSort::SortKeysDescending( + nullptr, cub_workspace_size, input, output, n_lists, begin_bit, end_bit, stream); + rmm::device_buffer cub_workspace(cub_workspace_size, stream, tmp_res); + cub::DeviceRadixSort::SortKeysDescending( + cub_workspace.data(), cub_workspace_size, input, output, n_lists, begin_bit, end_bit, stream); +} + } // namespace cuvs::neighbors::ivf::detail diff --git a/cpp/src/neighbors/ivf_common.cuh b/cpp/src/neighbors/ivf_common.cuh index d7a5da6e4e..ad3dc86d0d 100644 --- a/cpp/src/neighbors/ivf_common.cuh +++ b/cpp/src/neighbors/ivf_common.cuh @@ -9,10 +9,15 @@ #include #include // matrix::detail::select::warpsort::warp_sort_distributed -#include - namespace cuvs::neighbors::ivf::detail { +// Forward declaration of helper function to avoid including cub/device/* in header +void sort_cluster_sizes_descending(uint32_t* input, + uint32_t* output, + uint32_t n_lists, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* tmp_res); + /** * Default value returned by `search` when the `n_probes` is too small and top-k is too large. * One may encounter it if the combined size of probed clusters is smaller than the requested @@ -247,26 +252,9 @@ void recompute_internal_state(const raft::resources& res, Index& index) } // Sort the cluster sizes in the descending order. - int begin_bit = 0; - int end_bit = sizeof(uint32_t) * 8; - size_t cub_workspace_size = 0; - cub::DeviceRadixSort::SortKeysDescending(nullptr, - cub_workspace_size, - index.list_sizes().data_handle(), - sorted_sizes.data(), - index.n_lists(), - begin_bit, - end_bit, - stream); - rmm::device_buffer cub_workspace(cub_workspace_size, stream, tmp_res); - cub::DeviceRadixSort::SortKeysDescending(cub_workspace.data(), - cub_workspace_size, - index.list_sizes().data_handle(), - sorted_sizes.data(), - index.n_lists(), - begin_bit, - end_bit, - stream); + // Use helper function to avoid including cub/device/* in this header + sort_cluster_sizes_descending( + index.list_sizes().data_handle(), sorted_sizes.data(), index.n_lists(), stream, tmp_res); // copy the results to CPU std::vector sorted_sizes_host(index.n_lists()); raft::copy(sorted_sizes_host.data(), sorted_sizes.data(), index.n_lists(), stream); diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh index 9bbae06ccd..60dd868bc7 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh @@ -19,7 +19,6 @@ #include #include -#include #include namespace cuvs::neighbors::ivf_pq::detail { diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh index 556e1991bf..095d706569 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh @@ -42,7 +42,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/src/neighbors/scann/detail/scann_avq.cuh b/cpp/src/neighbors/scann/detail/scann_avq.cuh index 3e8238c5e2..6c4797918e 100644 --- a/cpp/src/neighbors/scann/detail/scann_avq.cuh +++ b/cpp/src/neighbors/scann/detail/scann_avq.cuh @@ -1,9 +1,11 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ -#include +#include +#include +#include #include #include #include diff --git a/cpp/src/sparse/neighbors/detail/cross_component_nn.cuh b/cpp/src/sparse/neighbors/detail/cross_component_nn.cuh index 6c90675eb9..4c0462b6fe 100644 --- a/cpp/src/sparse/neighbors/detail/cross_component_nn.cuh +++ b/cpp/src/sparse/neighbors/detail/cross_component_nn.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -26,7 +26,6 @@ #include -#include #include #include #include diff --git a/cpp/src/stats/detail/silhouette_score.cuh b/cpp/src/stats/detail/silhouette_score.cuh index d5dac46381..e6a426b25f 100644 --- a/cpp/src/stats/detail/silhouette_score.cuh +++ b/cpp/src/stats/detail/silhouette_score.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -17,7 +17,7 @@ #include -#include +#include #include diff --git a/cpp/tests/sparse/neighbors/cross_component_nn.cu b/cpp/tests/sparse/neighbors/cross_component_nn.cu index 298b071b6e..fdb601807d 100644 --- a/cpp/tests/sparse/neighbors/cross_component_nn.cu +++ b/cpp/tests/sparse/neighbors/cross_component_nn.cu @@ -29,8 +29,6 @@ #include -#include - #include #include From 04cc166b316a5d71936a4118ad3afb4f699ed223 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sun, 15 Feb 2026 21:18:06 +0000 Subject: [PATCH 102/158] missing include --- cpp/tests/cluster/linkage.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/tests/cluster/linkage.cu b/cpp/tests/cluster/linkage.cu index 10d6c58fbd..43824b16ad 100644 --- a/cpp/tests/cluster/linkage.cu +++ b/cpp/tests/cluster/linkage.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -13,6 +13,8 @@ #include #include +#include + #include #include From 1113afc2c3ab925cf7511cdba8a035effd34892b Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Sun, 15 Feb 2026 23:40:23 +0000 Subject: [PATCH 103/158] cleaning up --- cpp/cmake/thirdparty/get_raft.cmake | 6 +- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 138 ++--------------- .../cagra/compute_distance_standard-impl.cuh | 14 -- .../jit_lto_kernels/device_common_jit.cuh | 15 +- .../search_multi_cta_kernel_jit.cuh | 81 +--------- .../search_multi_kernel_jit.cuh | 1 - .../search_single_cta_kernel_jit.cuh | 1 - .../search_multi_cta_kernel_launcher_jit.cuh | 141 +----------------- .../search_multi_kernel_launcher_jit.cuh | 1 - .../search_single_cta_kernel_launcher_jit.cuh | 61 -------- cpp/src/neighbors/ivf_common_jit.cuh | 24 --- .../ivf_flat_interleaved_scan_kernel.cuh | 2 +- 12 files changed, 23 insertions(+), 462 deletions(-) delete mode 100644 cpp/src/neighbors/ivf_common_jit.cuh diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 8ecf3686be..ba7f68f09d 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -60,8 +60,8 @@ endfunction() # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${RAFT_VERSION}.00 - FORK ${RAFT_FORK} - PINNED_TAG ${RAFT_PINNED_TAG} + FORK divyegala + PINNED_TAG unneeded-cccl-includes ENABLE_MNMG_DEPENDENCIES OFF ENABLE_NVTX OFF BUILD_STATIC_DEPS ${CUVS_STATIC_RAPIDS_LIBRARIES} diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 8fa6f89273..0140518158 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -7,15 +7,12 @@ #include #include -#include #include #include -#include #include #include #include #include -#include #include #include @@ -28,16 +25,9 @@ void AlgorithmPlanner::add_entrypoint() { - std::cerr << "[JIT] AlgorithmPlanner::add_entrypoint - looking for entrypoint: " - << this->entrypoint << std::endl; - std::cerr.flush(); auto entrypoint_fragment = fragment_database().get_fragment(this->entrypoint); if (entrypoint_fragment == nullptr) { - std::cerr << "[JIT] ERROR: entrypoint fragment is NULL for: " << this->entrypoint << std::endl; - std::cerr.flush(); - } else { - std::cerr << "[JIT] Found entrypoint fragment for: " << this->entrypoint << std::endl; - std::cerr.flush(); + RAFT_FAIL("Entrypoint fragment is NULL for: %s", this->entrypoint.c_str()); } this->fragments.push_back(entrypoint_fragment); } @@ -66,29 +56,10 @@ std::shared_ptr AlgorithmPlanner::get_launcher() static std::mutex cache_mutex; std::lock_guard lock(cache_mutex); - std::cerr << "[JIT] AlgorithmPlanner::get_launcher called for entrypoint: " << this->entrypoint - << std::endl; - std::cerr.flush(); if (launchers.count(launch_key) == 0) { add_entrypoint(); add_device_functions(); - std::string log_message = - "JIT compiling launcher for entrypoint: " + this->entrypoint + " and device functions: "; - for (const auto& device_function : this->device_functions) { - log_message += device_function + ","; - } - log_message.pop_back(); - std::cerr << "[JIT] " << log_message << std::endl; - std::cerr.flush(); - - // Time the first-time JIT compilation - auto start_time = std::chrono::high_resolution_clock::now(); launchers[launch_key] = this->build(); - auto end_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end_time - start_time); - std::cerr << "[JIT] Compilation completed in " << duration.count() - << " ms for entrypoint: " << this->entrypoint << std::endl; - std::cerr.flush(); } else { RAFT_LOG_DEBUG("Using cached JIT launcher for entrypoint: %s", this->entrypoint.c_str()); } @@ -112,30 +83,13 @@ std::shared_ptr AlgorithmPlanner::build() auto result = nvJitLinkCreate(&handle, 3, lopts); check_nvjitlink_result(handle, result); - std::cerr << "[JIT] AlgorithmPlanner::build - Adding " << this->fragments.size() - << " fragments to linker:" << std::endl; - for (size_t i = 0; i < this->fragments.size(); ++i) { - std::cerr << "[JIT] Fragment [" << i << "] pointer: " << (void*)this->fragments[i] - << std::endl; - if (i == 0) { - std::cerr << "[JIT] (Entrypoint fragment)" << std::endl; - } else { - std::cerr << "[JIT] (Device function fragment: " << this->device_functions[i - 1] << ")" - << std::endl; - } - } - std::cerr.flush(); - for (auto& frag : this->fragments) { frag->add_to(handle); } - // Call to nvJitLinkComplete causes linker to link together all the LTO-IR - // modules perform any optimizations and generate cubin from it. result = nvJitLinkComplete(handle); check_nvjitlink_result(handle, result); - // get cubin from nvJitLink size_t cubin_size; result = nvJitLinkGetLinkedCubinSize(handle, &cubin_size); check_nvjitlink_result(handle, result); @@ -147,7 +101,7 @@ std::shared_ptr AlgorithmPlanner::build() result = nvJitLinkDestroy(&handle); RAFT_EXPECTS(result == NVJITLINK_SUCCESS, "nvJitLinkDestroy failed"); - // Debug: Save cubin to disk for inspection with cuobjdump + // Save cubin to disk for inspection with cuobjdump std::string cubin_path = "/tmp/linked_cubin_" + this->entrypoint + ".cubin"; // Sanitize filename (replace special chars) std::replace(cubin_path.begin(), cubin_path.end(), '/', '_'); @@ -179,37 +133,20 @@ std::shared_ptr AlgorithmPlanner::build() } // cubin is linked, so now load it - // NOTE: cudaLibrary_t does not need to be freed explicitly cudaLibrary_t library; RAFT_CUDA_TRY( cudaLibraryLoadData(&library, cubin.get(), nullptr, nullptr, 0, nullptr, nullptr, 0)); - // The entrypoint fragment should contain exactly one __global__ kernel - // Device functions (__device__) don't show up in kernel enumeration - // But we might have kernels from multiple fragments if they were linked together - std::cerr << "[JIT] AlgorithmPlanner::build - Fragments added: " << this->fragments.size() - << " (entrypoint + " << this->device_functions.size() << " device functions)" - << std::endl; - std::cerr << "[JIT] AlgorithmPlanner::build - Entrypoint: " << this->entrypoint << std::endl; - std::cerr.flush(); - - // Enumerate kernels - we expect only 1 kernel from the entrypoint fragment - // Device function fragments contain only __device__ functions, not __global__ kernels - // So they shouldn't show up in kernel enumeration - // First, query the actual number of kernels using cudaLibraryGetKernelCount (runtime API) + // Enumerate kernels unsigned int kernel_count = 0; cudaError_t cuda_result = cudaLibraryGetKernelCount(&kernel_count, library); if (cuda_result != cudaSuccess) { - std::cerr << "[JIT] ERROR: cudaLibraryGetKernelCount failed with error: " << cuda_result << " (" - << cudaGetErrorString(cuda_result) << ")" << std::endl; - std::cerr.flush(); RAFT_FAIL("cudaLibraryGetKernelCount failed with error: %d (%s)", cuda_result, cudaGetErrorString(cuda_result)); } - std::cerr << "[JIT] AlgorithmPlanner::build - Actual kernel count in library: " << kernel_count - << std::endl; + std::cerr << "[JIT] Kernel count in library: " << kernel_count << std::endl; std::cerr.flush(); if (kernel_count == 0) { @@ -217,85 +154,30 @@ std::shared_ptr AlgorithmPlanner::build() } if (kernel_count > 1) { - std::cerr << "[JIT] WARNING: Found " << kernel_count - << " kernels in library! This might be the issue - we're using kernel [0]" + std::cerr << "[JIT] WARNING: Found " << kernel_count << " kernels in library! Using kernel [0]" << std::endl; - std::cerr << "[JIT] Entrypoint we're looking for: " << this->entrypoint << std::endl; - std::cerr << "[JIT] This suggests multiple kernels are being linked together!" << std::endl; std::cerr.flush(); } - // Now allocate the right size and enumerate std::unique_ptr kernels{new cudaKernel_t[kernel_count]}; unsigned int kernel_count_verify = kernel_count; RAFT_CUDA_TRY(cudaLibraryEnumerateKernels(kernels.get(), kernel_count_verify, library)); if (kernel_count_verify != kernel_count) { - std::cerr << "[JIT] WARNING: Kernel count mismatch - cudaLibraryGetKernelCount returned " - << kernel_count << " but cudaLibraryEnumerateKernels returned " << kernel_count_verify - << std::endl; - std::cerr.flush(); - } - - // With runtime API, we can't get kernel names directly - // If there are multiple kernels, we'll use the first one - // The entrypoint fragment should be added first, so its kernel should be at index 0 - if (kernel_count > 1) { - std::cerr << "[JIT] WARNING: Multiple kernels found (" << kernel_count << "), using kernel [0]" - << std::endl; - std::cerr << "[JIT] Entrypoint we're looking for: " << this->entrypoint << std::endl; - std::cerr << "[JIT] This suggests multiple kernels are being linked together!" << std::endl; - std::cerr << "[JIT] Fragments added:" << std::endl; - for (size_t i = 0; i < this->fragments.size(); ++i) { - std::cerr << "[JIT] Fragment [" << i << "]: "; - if (i == 0) { - std::cerr << "Entrypoint fragment" << std::endl; - } else { - std::cerr << "Device function fragment: " << this->device_functions[i - 1] << std::endl; - } - } - std::cerr.flush(); + RAFT_FAIL( + "Kernel count mismatch: cudaLibraryGetKernelCount returned %u but " + "cudaLibraryEnumerateKernels returned %u", + kernel_count, + kernel_count_verify); } - // When multiple kernels are found, one is often CUB's EmptyKernel (a weak symbol - // instantiated when CUB headers are included). The entrypoint fragment is added first, - // so its kernel should be at index 0. However, the order is not guaranteed - sometimes - // CUB's EmptyKernel is at index 0, sometimes at index 1. - // Strategy: Try kernel[0] first. If it's EmptyKernel, it will be a no-op and won't affect - // results. We can't distinguish EmptyKernel from our kernel without names, so we'll use kernel[0] - // and rely on the fact that EmptyKernel does nothing. unsigned int kernel_index = 0; - if (kernel_count > 1) { - std::cerr << "[JIT] WARNING: Found " << kernel_count - << " kernels (CUB EmptyKernel may be present). Using kernel [0]" << std::endl; - std::cerr << "[JIT] If kernel [0] is EmptyKernel, results will be incorrect" << std::endl; - std::cerr << "[JIT] Entrypoint fragment is added first, so kernel [0] should be correct" - << std::endl; - std::cerr.flush(); - } auto kernel = kernels.release()[kernel_index]; - // Validate the kernel pointer is reasonable (not null, not obviously garbage) if (kernel == nullptr) { RAFT_FAIL("Entrypoint kernel is NULL for: %s", this->entrypoint.c_str()); } - void* kernel_ptr = (void*)kernel; - uintptr_t ptr_val = (uintptr_t)kernel_ptr; - // Check if pointer looks valid (not null, not obviously ASCII string data) - // On 64-bit systems, valid pointers are typically in the range 0x1000 to 0x7fffffffffff - // but kernel pointers from CUDA driver API can be in higher address ranges - // So we only check for null and obviously invalid values (too small) - if (ptr_val < 0x1000) { - RAFT_FAIL("Entrypoint kernel pointer looks invalid (0x%lx) - too small for: %s", - ptr_val, - this->entrypoint.c_str()); - } - - std::cerr << "[JIT] AlgorithmPlanner::build - Using kernel [0] as entrypoint, pointer: " - << kernel_ptr << std::endl; - std::cerr.flush(); - return std::make_shared(kernel); } diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh index 3e52991c3a..3e3c83f5c0 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh @@ -207,7 +207,6 @@ RAFT_DEVICE_INLINE_FUNCTION auto compute_distance_standard_worker( if (k >= dim) break; #pragma unroll for (uint32_t v = 0; v < vlen; v++) { - // Note this loop can go above the dataset_dim for padded arrays. This is not a problem // because: // - Above the last element (dataset_dim-1), the query array is filled with zeros. // - The data buffer has to be also padded with zeros. @@ -265,19 +264,6 @@ RAFT_KERNEL __launch_bounds__(1, 1) standard_dataset_descriptor_t; using base_type = typename desc_type::base_type; - // Debug: Verify we're constructing the right type - if (threadIdx.x == 0 && blockIdx.x == 0) { - printf( - "[INIT KERNEL] Constructing desc_type with: Metric=%d, TeamSize=%u, DatasetBlockDim=%u, " - "DataT=float, IndexT=uint32_t, DistanceT=float\n", - static_cast(Metric), - TeamSize, - DatasetBlockDim); - printf("[INIT KERNEL] desc_type::kTeamSize=%u, desc_type::kDatasetBlockDim=%u\n", - desc_type::kTeamSize, - desc_type::kDatasetBlockDim); - } - #ifdef CUVS_ENABLE_JIT_LTO // For JIT, we don't use the function pointers, so set them to nullptr // The free functions are called directly instead diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh index 516df56ca6..dbec070ea0 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh @@ -130,13 +130,6 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( } result_distances_ptr[i] = best_norm2_team_local; result_indices_ptr[i] = best_index_team_local; - // Debug: print first few random node distances - if (i < 3 && block_id == 0) { - printf("JIT random: i=%u idx=%u dist=%.6f\n", - i, - best_index_team_local, - (float)best_norm2_team_local); - } } } } @@ -247,13 +240,7 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( __syncwarp(); // Store the distance - if (valid_i && lead_lane) { - result_child_distances_ptr[j] = child_dist; - // Debug: print first few child node distances - if (j < 3 && threadIdx.x < 32) { - printf("JIT child: j=%u idx=%u dist=%.6f\n", j, child_id, (float)child_dist); - } - } + if (valid_i && lead_lane) { result_child_distances_ptr[j] = child_dist; } } } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh index 803653a700..783c0fb9c9 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -9,13 +9,6 @@ #include "../device_common.hpp" #include "../hashmap.hpp" #include "../utils.hpp" -// Note: -// - Extern function declarations (setup_workspace_standard, compute_distance_standard, etc.) use -// types from compute_distance-ext.cuh -// - Type definitions (standard_dataset_descriptor_t, etc.) are in the -impl.cuh files, included by -// the .cu.in files for template instantiation -// - pickup_next_parent and topk_by_bitonic_sort_wrapper_* are included via -// search_multi_cta_helpers.cuh in the .cu.in file #include // For DistanceType enum #include // For raft::upper_bound @@ -23,10 +16,10 @@ #include #include -#include // For std::is_same_v, std::true_type, std::false_type +#include #ifdef _CLK_BREAKDOWN -#include // For printf in debug code +#include #endif // Include extern function declarations before namespace so they're available to kernel definitions @@ -77,9 +70,8 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( uint32_t* const num_executed_iterations, /* stats */ uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) SourceIndexT bitset_len, // Bitset length - SourceIndexT original_nbits) // Original number of bits + SourceIndexT original_nbits) { - printf("IN THE KERNEL\n"); using DATA_T = DataT; using INDEX_T = IndexT; using DISTANCE_T = DistanceT; @@ -88,55 +80,6 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( return source_indices_ptr == nullptr ? static_cast(x) : source_indices_ptr[x]; }; - // CRITICAL DEBUG: Write to result buffer IMMEDIATELY to verify kernel is executing - // Write a magic value that we can check on host - do this before ANY other code - // Write from the first thread of the first block to maximize chance of execution - if (threadIdx.x == 0 && blockIdx.x == 0 && blockIdx.y == 0 && result_distances_ptr != nullptr && - result_indices_ptr != nullptr) { - // Write magic value to first distance to verify kernel execution - if (result_distances_ptr != nullptr) { - *result_distances_ptr = static_cast(3735928559.0f); // 0xDEADBEEF as float - } - // Also write to indices - if (result_indices_ptr != nullptr) { *result_indices_ptr = static_cast(0xCAFEBABE); } - - // Debug: Check if descriptor runtime values match kernel compile-time constants - // The kernel uses DescriptorT::kTeamSize and DescriptorT::kDatasetBlockDim (compile-time) - // The descriptor object has runtime values that should match - uint32_t desc_team_size_bitshift = dataset_desc->team_size_bitshift(); - uint32_t desc_team_size_actual = 1u << desc_team_size_bitshift; - uint32_t kernel_team_size = DescriptorT::kTeamSize; - uint32_t kernel_dataset_block_dim = DescriptorT::kDatasetBlockDim; - - // For standard descriptors, dataset_block_dim is stored in args.extra_word1 as 'ld' - // For VPQ descriptors, it's a compile-time constant only - uint32_t desc_dataset_block_dim = kernel_dataset_block_dim; // Use compile-time constant - if constexpr (!has_kpq_bits_v) { - // Standard descriptor - can read from args.ld - desc_dataset_block_dim = DescriptorT::ld(dataset_desc->args); - } - - printf("JIT KERNEL EXECUTING: threadIdx=0, wrote magic values\n"); - printf("JIT KERNEL: Descriptor team_size (from bitshift): %u, Kernel kTeamSize: %u\n", - desc_team_size_actual, - kernel_team_size); - printf("JIT KERNEL: Descriptor dataset_block_dim: %u, Kernel kDatasetBlockDim: %u\n", - desc_dataset_block_dim, - kernel_dataset_block_dim); - if (desc_team_size_actual != kernel_team_size || - desc_dataset_block_dim != kernel_dataset_block_dim) { - printf( - "JIT KERNEL ERROR: Parameter mismatch! team_size: %u vs %u, dataset_block_dim: %u vs %u\n", - desc_team_size_actual, - kernel_team_size, - desc_dataset_block_dim, - kernel_dataset_block_dim); - } else { - printf("JIT KERNEL: Parameters match correctly\n"); - } - } - __syncthreads(); - const auto num_queries = gridDim.y; const auto query_id = blockIdx.y; const auto num_cta_per_query = gridDim.x; @@ -394,13 +337,6 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( // Output search results (1st warp only). if (threadIdx.x < 32) { - // Debug: print buffer contents before output - if (query_id == 0 && cta_id == 0 && threadIdx.x < 5) { - printf("JIT pre-output: i=%u idx=%u dist=%.6f\n", - threadIdx.x, - result_indices_buffer[threadIdx.x], - (float)result_distances_buffer[threadIdx.x]); - } uint32_t offset = 0; for (uint32_t i = threadIdx.x; i < result_buffer_size_32; i += 32) { INDEX_T index = result_indices_buffer[i]; @@ -426,17 +362,6 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( if (result_distances_ptr != nullptr) { DISTANCE_T dist = result_distances_buffer[i]; result_distances_ptr[k] = dist; - // Debug: print first query, first CTA, first few results - if (query_id == 0 && cta_id == 0 && j < 5) { - printf("JIT: query=%u cta=%u j=%u i=%u idx=%u dist=%.6f buf_dist=%.6f\n", - query_id, - cta_id, - j, - i, - index & ~index_msb_1_mask, - (float)dist, - (float)result_distances_buffer[i]); - } } } else { // If it is valid and registered in the traversed hash table but is diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh index 5fb4369fd8..085abbec00 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh @@ -9,7 +9,6 @@ #include "../device_common.hpp" #include "../hashmap.hpp" #include "../utils.hpp" -// Note: The .cu.in files include the -impl.cuh files directly when instantiating kernels #include // For DistanceType enum #include // For raft::upper_bound diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh index 763d027e6d..e5b31dbbaf 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -72,7 +72,6 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { // Use fully qualified name since it's a template variable using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; -// Note: Helper functions (pickup_next_parents, move_invalid_to_end_of_list, hashmap_restore) // are defined in search_single_cta_kernel-inl.cuh which is included by the launcher. // We don't redefine them here to avoid duplicate definitions. diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 4fb0025c0d..f5e54dd13d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -25,10 +25,6 @@ #include #include #include -// Note: We don't include search_multi_cta_kernel_jit.cuh here because: -// - The launcher doesn't need the kernel function definitions -// - The kernel is dispatched via the JIT LTO launcher system -// - Including it would pull in impl files that cause namespace issues namespace cuvs::neighbors::cagra::detail::multi_cta_search { @@ -112,10 +108,6 @@ void select_and_run_jit( SampleFilterT sample_filter, cudaStream_t stream) { - std::cerr << "[JIT] select_and_run_jit (multi_cta) called (num_queries=" << num_queries - << ", topk=" << topk << ", num_cta_per_query=" << num_cta_per_query << ")" << std::endl; - std::cerr.flush(); - // Extract bitset data from filter object (if it's a bitset_filter) uint32_t* bitset_ptr = nullptr; SourceIndexT bitset_len = 0; @@ -139,9 +131,6 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - std::cerr << "[JIT] Using JIT path for CAGRA multi_cta search" << std::endl; - std::cerr.flush(); - // For multi_cta, we don't use topk_by_bitonic_sort or bitonic_sort_and_merge_multi_warps // These are handled inside the kernel based on max_elements // We need to construct the entrypoint name manually since it's different from single_cta @@ -156,13 +145,6 @@ void select_and_run_jit( RAFT_FAIL("Unsupported metric for multi_cta JIT kernel"); } - // Debug: Check descriptor parameters - std::cerr << "[JIT] Dataset descriptor - is_vpq: " << dataset_desc.is_vpq - << ", pq_bits: " << dataset_desc.pq_bits << ", pq_len: " << dataset_desc.pq_len - << ", team_size: " << dataset_desc.team_size - << ", dataset_block_dim: " << dataset_desc.dataset_block_dim << std::endl; - std::cerr.flush(); - // Create planner and register device functions // Pass team_size, dataset_block_dim, and VPQ parameters to match the kernel entrypoint name CagraMultiCtaSearchPlanner planner( @@ -173,19 +155,6 @@ void select_and_run_jit( dataset_desc.pq_bits, dataset_desc.pq_len); - // Debug: Verify entrypoint name matches descriptor parameters - std::cerr << "[JIT] Planner entrypoint: " << planner.get_entrypoint_name() << std::endl; - - // CRITICAL: Verify descriptor runtime values match what kernel was compiled for - // The kernel uses DescriptorT::kTeamSize and DescriptorT::kDatasetBlockDim (compile-time) - // But the descriptor object has runtime values that might differ - // We need to check if the kernel we're about to call was compiled for the same values - std::cerr << "[JIT] WARNING: Kernel was compiled for team_size=" << dataset_desc.team_size - << ", dataset_block_dim=" << dataset_desc.dataset_block_dim << " (from entrypoint name)" - << std::endl; - std::cerr << "[JIT] Descriptor runtime values - team_size: " << dataset_desc.team_size - << ", dataset_block_dim: " << dataset_desc.dataset_block_dim << std::endl; - std::cerr.flush(); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -204,23 +173,11 @@ void select_and_run_jit( auto params = make_fragment_key(); auto launcher = planner.get_launcher(); - if (!launcher) { - std::cerr << "[JIT] ERROR: Failed to get launcher - planner.get_launcher() returned null!" - << std::endl; - std::cerr.flush(); - RAFT_FAIL("Failed to get JIT launcher"); - } + if (!launcher) { RAFT_FAIL("Failed to get JIT launcher"); } // Verify kernel handle is valid cudaKernel_t kernel_handle = launcher->get_kernel(); - if (kernel_handle == nullptr) { - std::cerr << "[JIT] ERROR: Launcher has null kernel handle!" << std::endl; - std::cerr.flush(); - RAFT_FAIL("JIT launcher has null kernel handle"); - } - std::cerr << "[JIT] Launcher obtained successfully, kernel handle: " << kernel_handle - << std::endl; - std::cerr.flush(); + if (kernel_handle == nullptr) { RAFT_FAIL("JIT launcher has null kernel handle"); } uint32_t max_elements{}; if (result_buffer_size <= 64) { @@ -254,61 +211,11 @@ void select_and_run_jit( smem_size); // Get the device descriptor pointer - // CRITICAL: dev_ptr() returns const dataset_descriptor_base_t*, but kernel expects const - // DescriptorT* where DescriptorT is the specific derived type (standard_dataset_descriptor_t or - // cagra_q_dataset_descriptor_t) - // - // In C++, you cannot implicitly convert a base pointer to a derived pointer - this requires an - // explicit cast. However, since: - // 1. The object on device is actually of the derived type (we created it that way) - // 2. Base class is at offset 0 in single inheritance (pointer value is the same) - // 3. The kernel was JIT-compiled for the exact derived type matching these parameters - // - // We can safely use reinterpret_cast to convert the base pointer to the derived pointer type. - // The kernel will receive this as the derived type it expects. const dataset_descriptor_base_t* dev_desc_base = dataset_desc.dev_ptr(stream); - - // Cast to the derived type pointer - the kernel expects this specific type - // Note: We're casting to the base type pointer, but the kernel signature expects the derived - // type. This works because the pointer value is the same (base at offset 0), and the kernel will - // treat it as the derived type it was compiled for. However, this is technically undefined - // behavior in C++ but works in practice for CUDA kernels due to how they're dispatched. const auto* dev_desc = dev_desc_base; - // CRITICAL: Check if descriptor host values match kernel compile-time constants - // The kernel was compiled for specific team_size and dataset_block_dim values (from entrypoint - // name) The descriptor_host object has runtime values that MUST match what the kernel was - // compiled for - std::cerr << "[JIT] CRITICAL CHECK - Verifying descriptor matches kernel:" << std::endl; - std::cerr << "[JIT] Descriptor host values - team_size: " << dataset_desc.team_size - << ", dataset_block_dim: " << dataset_desc.dataset_block_dim << std::endl; - std::cerr << "[JIT] Kernel compiled for (from entrypoint) - team_size: " - << dataset_desc.team_size << ", dataset_block_dim: " << dataset_desc.dataset_block_dim - << std::endl; - - // The kernel uses DescriptorT::kTeamSize and DescriptorT::kDatasetBlockDim (compile-time) - // These MUST match dataset_desc.team_size and dataset_desc.dataset_block_dim - // If they don't match, the kernel will use wrong values and produce incorrect results - if (dataset_desc.team_size != dataset_desc.team_size || - dataset_desc.dataset_block_dim != dataset_desc.dataset_block_dim) { - std::cerr << "[JIT] ERROR: This should never happen - values should always match!" << std::endl; - } else { - std::cerr << "[JIT] OK: Descriptor values match (they're the same source)" << std::endl; - } - std::cerr.flush(); - - // Dispatch kernel via launcher - std::cerr << "[JIT] About to dispatch kernel with:" << std::endl; - std::cerr << "[JIT] grid: (" << grid_dims.x << ", " << grid_dims.y << ", " << grid_dims.z << ")" - << std::endl; - std::cerr << "[JIT] block: (" << block_dims.x << ", " << block_dims.y << ", " << block_dims.z - << ")" << std::endl; - std::cerr << "[JIT] smem_size: " << smem_size << std::endl; - std::cerr << "[JIT] dev_desc pointer: " << dev_desc << std::endl; - std::cerr.flush(); - - // CRITICAL: Cast size_t/int64_t parameters to match kernel signature exactly + // Cast size_t/int64_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly // graph.extent(1) returns int64_t but kernel expects uint32_t // traversed_hash_bitlen is int64_t but kernel expects uint32_t @@ -349,46 +256,8 @@ void select_and_run_jit( bitset_len, original_nbits); - // Check for errors immediately after launch - cudaError_t err = cudaPeekAtLastError(); - if (err != cudaSuccess) { - std::cerr << "[JIT] ERROR after kernel launch (peek): " << cudaGetErrorString(err) << " (" - << err << ")" << std::endl; - std::cerr.flush(); - } else { - std::cerr << "[JIT] No error after kernel launch (peek)" << std::endl; - std::cerr.flush(); - } - RAFT_CUDA_TRY(err); - - // Synchronize and check again - this will catch kernel execution errors - std::cerr << "[JIT] Synchronizing stream to check for kernel execution errors..." << std::endl; - std::cerr.flush(); - err = cudaStreamSynchronize(stream); - if (err != cudaSuccess) { - std::cerr << "[JIT] ERROR after kernel sync: " << cudaGetErrorString(err) << " (" << err << ")" - << std::endl; - std::cerr.flush(); - } else { - std::cerr << "[JIT] Stream synchronized successfully - kernel completed" << std::endl; - std::cerr.flush(); - - // Check if kernel wrote magic value to verify execution - if (topk_distances_ptr != nullptr && num_queries > 0) { - DistanceT first_distance; - RAFT_CUDA_TRY( - cudaMemcpy(&first_distance, topk_distances_ptr, sizeof(DistanceT), cudaMemcpyDeviceToHost)); - if (first_distance == static_cast(3735928559.0f)) { // 0xDEADBEEF - std::cerr << "[JIT] VERIFIED: Kernel wrote magic value 0xDEADBEEF to first distance!" - << std::endl; - } else { - std::cerr << "[JIT] WARNING: Kernel did NOT write magic value. First distance: " - << first_distance << std::endl; - } - std::cerr.flush(); - } - } - RAFT_CUDA_TRY(err); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); } } // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh index 1b156c5aef..b3d717cc5e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -24,7 +24,6 @@ #include #include #include -// Note: We don't include search_multi_kernel_jit.cuh here because: // - The launcher doesn't need the kernel function definitions // - The kernel is dispatched via the JIT LTO launcher system // - Including it would pull in impl files that cause namespace issues diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 59887278c9..64e6d2b3d8 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -53,19 +53,6 @@ auto get_runner_jit(Args... args) -> std::shared_ptr; template auto create_runner_jit(Args... args) -> std::shared_ptr; -// Debug: Verify JIT launcher is being compiled - force instantiation -struct JitLauncherVerifier { - JitLauncherVerifier() - { - std::cerr << "[JIT] JIT launcher header file included!" << std::endl; - std::cerr.flush(); - } -}; -// Force instantiation by creating a static instance -namespace { -static JitLauncherVerifier g_jit_verifier; -} - // Helper functions to get tags for JIT LTO namespace { template @@ -477,7 +464,6 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn } // Prepare kernel arguments - // Note: For non-VPQ, the dataset pointer is accessed via the descriptor on device // Get the device descriptor pointer - kernel will use the concrete type from template const auto* dev_desc = dataset_desc.get().dev_ptr(stream); @@ -616,10 +602,6 @@ void select_and_run_jit( SampleFilterT sample_filter, cudaStream_t stream) { - std::cerr << "[JIT] select_and_run_jit called (num_queries=" << num_queries << ", topk=" << topk - << ", persistent=" << (ps.persistent ? "true" : "false") << ")" << std::endl; - std::cerr.flush(); - const SourceIndexT* source_indices_ptr = source_indices.has_value() ? source_indices->data_handle() : nullptr; @@ -659,9 +641,6 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - std::cerr << "[JIT] Using JIT path for CAGRA persistent search" << std::endl; - std::cerr.flush(); - CagraSearchPlanner planner( dataset_desc.metric, topk_by_bitonic_sort, @@ -725,9 +704,6 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - std::cerr << "[JIT] Using JIT path for CAGRA search" << std::endl; - std::cerr.flush(); - CagraSearchPlanner planner( dataset_desc.metric, topk_by_bitonic_sort, @@ -769,16 +745,6 @@ void select_and_run_jit( smem_size); // Dispatch kernel via launcher - // The kernel signature expects const desc_t* where desc_t is the concrete descriptor type - // We pass the base pointer (const dataset_descriptor_base_t*), and since both concrete types - // inherit from the base class with the base class at offset 0, the pointer value is the same. - // The dispatch() function takes the address of each argument (&dev_desc), so the kernel - // receives a pointer to the descriptor pointer. The JIT-compiled kernel expects const desc_t*, - // so it will interpret the pointer value as the concrete type it was compiled for. Note: We - // cannot use dynamic_cast because the base class has no virtual functions (uses function - // pointers for performance). We also cannot use static_cast because the concrete type is only - // known at JIT compile time, not at launcher compile time. The pointer value is correct, so - // the kernel can safely use it as the concrete type. launcher->dispatch( stream, grid, @@ -845,33 +811,6 @@ void select_and_run( SampleFilterT sample_filter, cudaStream_t stream) { - // CRITICAL: Write to file to prove function is called - { - std::ofstream f("/tmp/jit_wrapper_called.txt", std::ios::app); - f << "[JIT] select_and_run wrapper CALLED! num_queries=" << num_queries << ", topk=" << topk - << std::endl; - f.close(); - } - - // Also try all output methods - fprintf(stderr, "\n[JIT] ========================================\n"); - fprintf(stderr, "[JIT] select_and_run JIT WRAPPER CALLED!\n"); - fprintf(stderr, "[JIT] num_queries=%u, topk=%u\n", num_queries, topk); - fprintf(stderr, "[JIT] ========================================\n\n"); - fflush(stderr); - printf("[JIT] select_and_run JIT WRAPPER CALLED! (stdout)\n"); - fflush(stdout); - - // Verify JIT launcher is loaded (static initializer already ran) - std::cerr << "[JIT] select_and_run wrapper called (JIT path active)" << std::endl; - std::cerr.flush(); - - // Extract parameters for JIT version - // Note: These parameters are not stored in dataset_descriptor_host, so we need to - // compute them or use defaults. For now, we'll need to pass them through the interface - // or compute from available information. For the JIT path, we'll use reasonable defaults - // and let the kernel handle missing information. - // For JIT version, we pass the descriptor directly - all dataset info is in the descriptor select_and_run_jit(dataset_desc, graph, source_indices, diff --git a/cpp/src/neighbors/ivf_common_jit.cuh b/cpp/src/neighbors/ivf_common_jit.cuh deleted file mode 100644 index 31cd96ecd1..0000000000 --- a/cpp/src/neighbors/ivf_common_jit.cuh +++ /dev/null @@ -1,24 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include // matrix::detail::select::warpsort::warp_sort_distributed - -namespace cuvs::neighbors::ivf::detail { - -/** - * Dummy block sort type used when Capacity is 0 in JIT kernels. - * This is a minimal header that doesn't include CUB to avoid EmptyKernel instantiation. - */ -template -struct dummy_block_sort_t { - using queue_t = raft::matrix::detail::select::warpsort:: - warp_sort_distributed; - template - __device__ dummy_block_sort_t(int k, Args...) {}; -}; - -} // namespace cuvs::neighbors::ivf::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh index e5e10720eb..3a14fe8afd 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh @@ -5,7 +5,7 @@ #pragma once -#include "../../ivf_common_jit.cuh" +#include "../../ivf_common.cuh" #include From 6feecced67518d451ba62fb3d34be80d16dd7bb9 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 17 Feb 2026 17:52:26 +0000 Subject: [PATCH 104/158] most errors resolved --- .../modules/generate_jit_lto_kernels.cmake | 8 +- .../cuvs/detail/jit_lto/AlgorithmLauncher.hpp | 15 ++- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 36 ++++++- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 99 ++++++++++++++++++- .../cagra/jit_lto_kernels/filter_bitset.cuh | 47 +++++++-- .../detail/cagra/search_multi_cta_inst.cuh | 3 +- .../ivf_flat/jit_lto_kernels/filter.cu.in | 2 +- .../jit_lto_kernels/filter_bitset.cuh | 50 ++++++++-- .../ivf_flat/jit_lto_kernels/filter_none.cuh | 2 +- .../ivf_flat_interleaved_scan_kernel.cuh | 24 +++-- cpp/tests/test_utils.cuh | 3 +- 11 files changed, 251 insertions(+), 38 deletions(-) diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 9103145618..187dfe731e 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -327,11 +327,11 @@ function(generate_jit_lto_kernels target) foreach(pq_len IN LISTS cagra_pq_lens) # setup_workspace_vpq set(kernel_name - "setup_workspace_vpq_l2_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "setup_workspace_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") - set(metric_name "l2") + set(metric_name "L2Expanded") set(metric_tag "l2") set(pq_bits "${cagra_pq_bits}") set(codebook_type "${cagra_codebook_type}") @@ -355,11 +355,11 @@ function(generate_jit_lto_kernels target) # compute_distance_vpq set(kernel_name - "compute_distance_vpq_l2_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "compute_distance_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") - set(metric_name "l2") + set(metric_name "L2Expanded") set(metric_tag "l2") set(pq_bits "${cagra_pq_bits}") set(codebook_type "${cagra_codebook_type}") diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp index 5723cd10c7..f376342c4b 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp @@ -15,9 +15,19 @@ #include struct AlgorithmLauncher { - AlgorithmLauncher() = default; + AlgorithmLauncher() : kernel{nullptr}, library{nullptr} {} - AlgorithmLauncher(cudaKernel_t k); + AlgorithmLauncher(cudaKernel_t k, cudaLibrary_t lib); + + ~AlgorithmLauncher(); + + // Delete copy constructor and assignment to prevent accidental copying + AlgorithmLauncher(const AlgorithmLauncher&) = delete; + AlgorithmLauncher& operator=(const AlgorithmLauncher&) = delete; + + // Allow move constructor and assignment + AlgorithmLauncher(AlgorithmLauncher&& other) noexcept; + AlgorithmLauncher& operator=(AlgorithmLauncher&& other) noexcept; template void dispatch(cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, Args&&... args) @@ -41,6 +51,7 @@ struct AlgorithmLauncher { void call_cooperative( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** args); cudaKernel_t kernel; + cudaLibrary_t library; }; std::unordered_map>& get_cached_launchers(); diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index df5a72c0be..36e0216bcd 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -9,7 +9,41 @@ #include -AlgorithmLauncher::AlgorithmLauncher(cudaKernel_t k) : kernel{k} {} +AlgorithmLauncher::AlgorithmLauncher(cudaKernel_t k, cudaLibrary_t lib) : kernel{k}, library{lib} {} + +AlgorithmLauncher::~AlgorithmLauncher() +{ + if (library != nullptr) { + cudaError_t err = cudaLibraryUnload(library); + if (err != cudaSuccess) { + // Log error but don't throw in destructor + std::cerr << "[JIT] WARNING: Failed to unload library in destructor: " + << cudaGetErrorString(err) << std::endl; + std::cerr.flush(); + } + library = nullptr; + } +} + +AlgorithmLauncher::AlgorithmLauncher(AlgorithmLauncher&& other) noexcept + : kernel{other.kernel}, library{other.library} +{ + other.kernel = nullptr; + other.library = nullptr; +} + +AlgorithmLauncher& AlgorithmLauncher::operator=(AlgorithmLauncher&& other) noexcept +{ + if (this != &other) { + // Unload current library if it exists + if (library != nullptr) { cudaLibraryUnload(library); } + kernel = other.kernel; + library = other.library; + other.kernel = nullptr; + other.library = nullptr; + } + return *this; +} void AlgorithmLauncher::call( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index ffc016adc5..3b3f804929 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -77,6 +77,54 @@ std::shared_ptr AlgorithmPlanner::build() std::string archs = "-arch=sm_" + std::to_string((major * 10 + minor)); + // Generate individual cubin for each device function fragment for debugging + // Skip entrypoint fragment as it depends on device functions and will fail to link alone + for (auto& frag : this->fragments) { + // Skip if this is the entrypoint fragment + if (frag->compute_key == this->entrypoint) { continue; } + + nvJitLinkHandle frag_handle; + const char* frag_lopts[] = {"-lto", archs.c_str()}; + auto frag_result = nvJitLinkCreate(&frag_handle, 2, frag_lopts); + check_nvjitlink_result(frag_handle, frag_result); + + frag->add_to(frag_handle); + + frag_result = nvJitLinkComplete(frag_handle); + check_nvjitlink_result(frag_handle, frag_result); + + size_t frag_cubin_size; + frag_result = nvJitLinkGetLinkedCubinSize(frag_handle, &frag_cubin_size); + check_nvjitlink_result(frag_handle, frag_result); + + if (frag_cubin_size > 0) { + std::unique_ptr frag_cubin{new char[frag_cubin_size]}; + frag_result = nvJitLinkGetLinkedCubin(frag_handle, frag_cubin.get()); + check_nvjitlink_result(frag_handle, frag_result); + + // Save individual fragment cubin + std::string frag_cubin_path = "/tmp/fragment_cubin_" + frag->compute_key + ".cubin"; + std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), '/', '_'); + std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), ':', '_'); + std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), '<', '_'); + std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), '>', '_'); + std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), ' ', '_'); + FILE* frag_f = fopen(frag_cubin_path.c_str(), "wb"); + if (frag_f) { + size_t written = fwrite(frag_cubin.get(), 1, frag_cubin_size, frag_f); + fclose(frag_f); + if (written == frag_cubin_size) { + std::cerr << "[JIT] Saved fragment cubin: " << frag_cubin_path + << " (size: " << frag_cubin_size << " bytes)" << std::endl; + std::cerr << "[JIT] Run: cuobjdump --dump-elf-symbols " << frag_cubin_path << std::endl; + } + } + } + + frag_result = nvJitLinkDestroy(&frag_handle); + RAFT_EXPECTS(frag_result == NVJITLINK_SUCCESS, "nvJitLinkDestroy failed for fragment"); + } + // Load the generated LTO IR and link them together nvJitLinkHandle handle; const char* lopts[] = {"-lto", archs.c_str()}; @@ -171,13 +219,58 @@ std::shared_ptr AlgorithmPlanner::build() kernel_count_verify); } - unsigned int kernel_index = 0; + // Filter out EmptyKernel by checking kernel names using cudaFuncGetName + const char* empty_kernel_name = "_ZN3cub6detail11EmptyKernelIvEEvv"; + std::vector valid_kernels; + valid_kernels.reserve(kernel_count); + + for (unsigned int i = 0; i < kernel_count; ++i) { + // cudaFuncGetName can be used with cudaKernel_t by casting to void* + const void* func_ptr = reinterpret_cast(kernels[i]); + const char* func_name = nullptr; + cudaError_t name_result = cudaFuncGetName(&func_name, func_ptr); - auto kernel = kernels.release()[kernel_index]; + bool is_empty_kernel = false; + if (name_result == cudaSuccess && func_name != nullptr) { + std::string kernel_name(func_name); + // Check if this is EmptyKernel + if (kernel_name.find(empty_kernel_name) != std::string::npos || + kernel_name == empty_kernel_name) { + std::cerr << "[JIT] Filtering out EmptyKernel: " << kernel_name << std::endl; + std::cerr.flush(); + is_empty_kernel = true; + } else { + std::cerr << "[JIT] Found kernel: " << kernel_name << std::endl; + std::cerr.flush(); + } + } else { + // If we can't get the name, keep the kernel (better safe than sorry) + std::cerr << "[JIT] Warning: Could not get name for kernel [" << i + << "], keeping it (error: " << cudaGetErrorString(name_result) << ")" << std::endl; + std::cerr.flush(); + } + + // Only keep the kernel if it's not EmptyKernel + if (!is_empty_kernel) { valid_kernels.push_back(kernels[i]); } + } + + if (valid_kernels.empty()) { + RAFT_FAIL("No valid kernels found after filtering EmptyKernel for entrypoint: %s", + this->entrypoint.c_str()); + } + + if (valid_kernels.size() > 1) { + std::cerr << "[JIT] WARNING: Found " << valid_kernels.size() + << " valid kernels after filtering! Using kernel [0]" << std::endl; + std::cerr.flush(); + } + + unsigned int kernel_index = 0; + auto kernel = valid_kernels[kernel_index]; if (kernel == nullptr) { RAFT_FAIL("Entrypoint kernel is NULL for: %s", this->entrypoint.c_str()); } - return std::make_shared(kernel); + return std::make_shared(kernel, library); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh index 3440b8d54d..fec5f09e85 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh @@ -5,12 +5,46 @@ #pragma once -#include "../../../sample_filter.cuh" #include "filter_data.h" -#include namespace cuvs::neighbors::cagra::detail { +// Inline implementation of bitset_view::test() to avoid including bitset.cuh +// which transitively includes Thrust +template +__device__ inline bool bitset_view_test(const bitset_t* bitset_ptr, + index_t bitset_len, + index_t original_nbits, + index_t sample_index) +{ + constexpr index_t bitset_element_size = sizeof(bitset_t) * 8; + const index_t nbits = sizeof(bitset_t) * 8; + index_t bit_index = 0; + index_t bit_offset = 0; + + if (original_nbits == 0 || nbits == original_nbits) { + bit_index = sample_index / bitset_element_size; + bit_offset = sample_index % bitset_element_size; + } else { + // Handle original_nbits != nbits case + const index_t original_bit_index = sample_index / original_nbits; + const index_t original_bit_offset = sample_index % original_nbits; + bit_index = original_bit_index * original_nbits / nbits; + bit_offset = 0; + if (original_nbits > nbits) { + bit_index += original_bit_offset / nbits; + bit_offset = original_bit_offset % nbits; + } else { + index_t ratio = nbits / original_nbits; + bit_offset += (original_bit_index % ratio) * original_nbits; + bit_offset += original_bit_offset % nbits; + } + } + const bitset_t bit_element = bitset_ptr[bit_index]; + const bool is_bit_set = (bit_element & (bitset_t{1} << bit_offset)) != 0; + return is_bit_set; +} + template __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* filter_data) { @@ -25,12 +59,9 @@ __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* fil return true; // No bitset provided, allow all } - // Create bitset_view and filter, matching non-JIT behavior - auto bitset_view = raft::core::bitset_view{ - bitset_data->bitset_ptr, bitset_data->bitset_len, bitset_data->original_nbits}; - auto bitset_filter = - cuvs::neighbors::filtering::bitset_filter{bitset_view}; - return bitset_filter(query_id, node_id); + // Directly test the bitset without needing bitset_filter wrapper + return bitset_view_test( + bitset_data->bitset_ptr, bitset_data->bitset_len, bitset_data->original_nbits, node_id); } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh index bd4d25d8f3..fe4b7a3720 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh @@ -1,10 +1,11 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once +#include "../../sample_filter.cuh" #include "search_multi_cta_kernel-inl.cuh" #include diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in index 934e36dba7..ccef2fd68d 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in @@ -12,7 +12,7 @@ namespace cuvs::neighbors::ivf_flat::detail { // Instantiate the device function template -template __device__ bool sample_filter(int64_t* const* const, const uint32_t, const uint32_t, const uint32_t, uint32_t*, int64_t, int64_t); +template __device__ bool sample_filter(int64_t* const* const, uint32_t, uint32_t, uint32_t, uint32_t*, int64_t, int64_t); } // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh index 07fc4a21f5..49cd89d08a 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh @@ -5,10 +5,46 @@ #pragma once -#include "../../sample_filter.cuh" +#include namespace cuvs::neighbors::ivf_flat::detail { +// Inline implementation of bitset_view::test() to avoid including bitset.cuh +// which transitively includes Thrust +template +__device__ inline bool bitset_view_test(const bitset_t* bitset_ptr, + index_t bitset_len, + index_t original_nbits, + index_t sample_index) +{ + constexpr index_t bitset_element_size = sizeof(bitset_t) * 8; + const index_t nbits = sizeof(bitset_t) * 8; + index_t bit_index = 0; + index_t bit_offset = 0; + + if (original_nbits == 0 || nbits == original_nbits) { + bit_index = sample_index / bitset_element_size; + bit_offset = sample_index % bitset_element_size; + } else { + // Handle original_nbits != nbits case + const index_t original_bit_index = sample_index / original_nbits; + const index_t original_bit_offset = sample_index % original_nbits; + bit_index = original_bit_index * original_nbits / nbits; + bit_offset = 0; + if (original_nbits > nbits) { + bit_index += original_bit_offset / nbits; + bit_offset = original_bit_offset % nbits; + } else { + index_t ratio = nbits / original_nbits; + bit_offset += (original_bit_index % ratio) * original_nbits; + bit_offset += original_bit_offset % nbits; + } + } + const bitset_t bit_element = bitset_ptr[bit_index]; + const bool is_bit_set = (bit_element & (bitset_t{1} << bit_offset)) != 0; + return is_bit_set; +} + template __device__ bool sample_filter(index_t* const* const inds_ptrs, const uint32_t query_ix, @@ -18,13 +54,11 @@ __device__ bool sample_filter(index_t* const* const inds_ptrs, index_t bitset_len, index_t original_nbits) { - auto bitset_view = - raft::core::bitset_view{bitset_ptr, bitset_len, original_nbits}; - auto bitset_filter = cuvs::neighbors::filtering::bitset_filter{bitset_view}; - auto ivf_to_sample_filter = cuvs::neighbors::filtering:: - ivf_to_sample_filter>{ - inds_ptrs, bitset_filter}; - return ivf_to_sample_filter(query_ix, cluster_ix, sample_ix); + // Convert cluster_ix and sample_ix to a single sample index using inds_ptrs + const index_t sample_index = inds_ptrs[cluster_ix][sample_ix]; + + // Directly test the bitset without needing bitset_filter or ivf_to_sample_filter wrappers + return bitset_view_test(bitset_ptr, bitset_len, original_nbits, sample_index); } } // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh index aad15d64bc..90a124688a 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh @@ -5,7 +5,7 @@ #pragma once -#include "../../sample_filter.cuh" +#include namespace cuvs::neighbors::ivf_flat::detail { diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh index 3a14fe8afd..d32f29b859 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh @@ -5,10 +5,6 @@ #pragma once -#include "../../ivf_common.cuh" - -#include - #include #include #include @@ -19,6 +15,19 @@ namespace cuvs::neighbors::ivf_flat::detail { +// Define kIndexGroupSize locally to avoid including ivf_flat.hpp which transitively includes Thrust +constexpr static uint32_t kIndexGroupSize = 32; + +// Define dummy_block_sort_t locally to avoid including ivf_common.cuh which transitively includes +// Thrust +template +struct dummy_block_sort_t { + using queue_t = raft::matrix::detail::select::warpsort:: + warp_sort_distributed; + template + __device__ dummy_block_sort_t(int k, Args...) {}; +}; + static constexpr int kThreadsPerBlock = 128; // These extern device functions are linked at runtime using JIT-LTO. @@ -725,9 +734,8 @@ struct flat_block_sort { }; template -struct flat_block_sort<0, Ascending, T, IdxT> - : ivf::detail::dummy_block_sort_t { - using type = ivf::detail::dummy_block_sort_t; +struct flat_block_sort<0, Ascending, T, IdxT> : dummy_block_sort_t { + using type = dummy_block_sort_t; }; template @@ -805,7 +813,7 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) } // Copy a part of the query into shared memory for faster processing - copy_vectorized(query_shared, query, std::min(dim, query_smem_elems)); + raft::copy_vectorized(query_shared, query, std::min(dim, query_smem_elems)); __syncthreads(); using local_topk_t = block_sort_t; diff --git a/cpp/tests/test_utils.cuh b/cpp/tests/test_utils.cuh index 11b21f3647..7fc0b023bd 100644 --- a/cpp/tests/test_utils.cuh +++ b/cpp/tests/test_utils.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2018-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -15,6 +15,7 @@ #include #include #include +#include #include #include From 52e05c23f5eeaea821b42b330c73f9523160391d Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 17 Feb 2026 22:01:12 +0000 Subject: [PATCH 105/158] debug filter fragment --- .../cagra/jit_lto_kernels/filter_bitset.cuh | 8 +- .../search_multi_kernel_planner.hpp | 16 +- .../search_single_cta_kernel_jit.cuh | 82 ++++----- .../search_multi_cta_kernel_launcher_jit.cuh | 61 ++----- .../search_multi_kernel_launcher_jit.cuh | 59 +------ .../search_single_cta_kernel_launcher_jit.cuh | 158 ++++++++---------- .../detail/cagra/shared_launcher_jit.hpp | 97 +++++++++++ 7 files changed, 245 insertions(+), 236 deletions(-) create mode 100644 cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh index fec5f09e85..c40626afa7 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh @@ -60,8 +60,14 @@ __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* fil } // Directly test the bitset without needing bitset_filter wrapper - return bitset_view_test( + // bitset_view_test returns true if the bit is set (node_id is in the bitset) + // For a bitset created from removed_indices, if the bit is set, the node should be filtered out + // So we return the inverse: if the bit is set, return false to reject the node + bool is_in_bitset = bitset_view_test( bitset_data->bitset_ptr, bitset_data->bitset_len, bitset_data->original_nbits, node_id); + // If node_id is in the bitset (removed set), return false to reject it + // If node_id is not in the bitset, return true to allow it + return !is_in_bitset; } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index 226c6c8a1f..54dad691be 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -30,12 +30,16 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { : AlgorithmPlanner( build_entrypoint_name( kernel_name, metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len), - is_vpq ? make_fragment_key() - : make_fragment_key()), + // Special case: apply_filter_kernel doesn't use DataTag, only IndexTag, DistanceTag, + // SourceIndexTag + (kernel_name == "apply_filter_kernel") + ? make_fragment_key() + : (is_vpq ? make_fragment_key() + : make_fragment_key())), entrypoint_name_(build_entrypoint_name( kernel_name, metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len)) { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh index e5b31dbbaf..5675f1e8e1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -115,10 +115,11 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( const std::uint32_t small_hash_bitlen, const std::uint32_t small_hash_reset_interval, const std::uint32_t query_id, - const DescriptorT* dataset_desc, // Concrete descriptor type from template - uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) - SourceIndexT bitset_len, // Bitset length - SourceIndexT original_nbits) // Original number of bits + const std::uint32_t query_id_offset, // Offset to add to query_id when calling filter + const DescriptorT* dataset_desc, // Concrete descriptor type from template + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits { using LOAD_T = device::LOAD_128BIT_T; @@ -371,7 +372,7 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( // Construct filter_data struct (bitset data is in global memory) cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( bitset_ptr, bitset_len, original_nbits); - if (!sample_filter(query_id, + if (!sample_filter(query_id + query_id_offset, to_source_index(parent_id), bitset_ptr != nullptr ? &filter_data : nullptr)) { result_distances_buffer[parent_list_buffer[p]] = utils::get_max_value(); @@ -395,8 +396,9 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( bitset_ptr, bitset_len, original_nbits); if (node_id != (invalid_index & ~index_msb_1_mask) && - !sample_filter( - query_id, to_source_index(node_id), bitset_ptr != nullptr ? &filter_data : nullptr)) { + !sample_filter(query_id + query_id_offset, + to_source_index(node_id), + bitset_ptr != nullptr ? &filter_data : nullptr)) { result_distances_buffer[i] = utils::get_max_value(); result_indices_buffer[i] = invalid_index; } @@ -527,33 +529,34 @@ template -RAFT_KERNEL __launch_bounds__(1024, 1) - search_kernel_jit(uintptr_t result_indices_ptr, - DistanceT* const result_distances_ptr, - const std::uint32_t top_k, - const DataT* const queries_ptr, - const IndexT* const knn_graph, - const std::uint32_t graph_degree, - const SourceIndexT* source_indices_ptr, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const IndexT* seed_ptr, - const uint32_t num_seeds, - IndexT* const visited_hashmap_ptr, - const std::uint32_t max_candidates, - const std::uint32_t max_itopk, - const std::uint32_t internal_topk, - const std::uint32_t search_width, - const std::uint32_t min_iteration, - const std::uint32_t max_iteration, - std::uint32_t* const num_executed_iterations, - const std::uint32_t hash_bitlen, - const std::uint32_t small_hash_bitlen, - const std::uint32_t small_hash_reset_interval, - const DescriptorT* dataset_desc, // Concrete descriptor type from template - uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) - SourceIndexT bitset_len, // Bitset length - SourceIndexT original_nbits) // Original number of bits +RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( + uintptr_t result_indices_ptr, + DistanceT* const result_distances_ptr, + const std::uint32_t top_k, + const DataT* const queries_ptr, + const IndexT* const knn_graph, + const std::uint32_t graph_degree, + const SourceIndexT* source_indices_ptr, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const IndexT* seed_ptr, + const uint32_t num_seeds, + IndexT* const visited_hashmap_ptr, + const std::uint32_t max_candidates, + const std::uint32_t max_itopk, + const std::uint32_t internal_topk, + const std::uint32_t search_width, + const std::uint32_t min_iteration, + const std::uint32_t max_iteration, + std::uint32_t* const num_executed_iterations, + const std::uint32_t hash_bitlen, + const std::uint32_t small_hash_bitlen, + const std::uint32_t small_hash_reset_interval, + const std::uint32_t query_id_offset, // Offset to add to query_id when calling filter + const DescriptorT* dataset_desc, // Concrete descriptor type from template + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits { const auto query_id = blockIdx.y; search_core>; __shared__ typename job_desc_type::input_t job_descriptor; @@ -706,6 +711,7 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_p_jit( small_hash_bitlen, small_hash_reset_interval, query_id, + query_id_offset, dataset_desc, bitset_ptr, bitset_len, diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index f5e54dd13d..3d5d0a6f00 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -14,8 +14,10 @@ #include "compute_distance.hpp" // For dataset_descriptor_host #include "jit_lto_kernels/search_multi_cta_planner.hpp" -#include "search_plan.cuh" // For search_params -#include "set_value_batch.cuh" // For set_value_batch +#include "sample_filter_utils.cuh" // For CagraSampleFilterWithQueryIdOffset +#include "search_plan.cuh" // For search_params +#include "set_value_batch.cuh" // For set_value_batch +#include "shared_launcher_jit.hpp" // For shared JIT helper functions #include #include #include @@ -28,55 +30,12 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { -// Helper functions to get tags for JIT LTO -namespace { -template -constexpr auto get_data_type_tag() -{ - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_f{}; } - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_h{}; } - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_sc{}; } - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_uc{}; } -} - -template -constexpr auto get_index_type_tag() -{ - if constexpr (std::is_same_v) { - return cuvs::neighbors::cagra::detail::tag_idx_ui{}; - } -} - -template -constexpr auto get_distance_type_tag() -{ - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_dist_f{}; } -} - -template -constexpr auto get_source_index_type_tag() -{ - if constexpr (std::is_same_v) { - return cuvs::neighbors::cagra::detail::tag_idx_ui{}; - } -} - -template -std::string get_sample_filter_name() -{ - if constexpr (std::is_same_v) { - return "filter_none"; - } else if constexpr ( - std::is_same_v> || - std::is_same_v>) { - return "filter_bitset"; - } else { - // Default to none filter for unknown types - return "filter_none"; - } -} -} // namespace +// Import shared JIT helper functions +using cuvs::neighbors::cagra::detail::get_data_type_tag; +using cuvs::neighbors::cagra::detail::get_distance_type_tag; +using cuvs::neighbors::cagra::detail::get_index_type_tag; +using cuvs::neighbors::cagra::detail::get_sample_filter_name; +using cuvs::neighbors::cagra::detail::get_source_index_type_tag; // JIT version of select_and_run for multi_cta template #include #include @@ -30,55 +32,12 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -// Helper functions to get tags for JIT LTO -namespace { -template -constexpr auto get_data_type_tag() -{ - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_f{}; } - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_h{}; } - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_sc{}; } - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_uc{}; } -} - -template -constexpr auto get_index_type_tag() -{ - if constexpr (std::is_same_v) { - return cuvs::neighbors::cagra::detail::tag_idx_ui{}; - } -} - -template -constexpr auto get_distance_type_tag() -{ - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_dist_f{}; } -} - -template -constexpr auto get_source_index_type_tag() -{ - if constexpr (std::is_same_v) { - return cuvs::neighbors::cagra::detail::tag_idx_ui{}; - } -} - -template -std::string get_sample_filter_name() -{ - if constexpr (std::is_same_v) { - return "filter_none"; - } else if constexpr ( - std::is_same_v> || - std::is_same_v>) { - return "filter_bitset"; - } else { - // Default to none filter for unknown types - return "filter_none"; - } -} -} // namespace +// Import shared JIT helper functions +using cuvs::neighbors::cagra::detail::get_data_type_tag; +using cuvs::neighbors::cagra::detail::get_distance_type_tag; +using cuvs::neighbors::cagra::detail::get_index_type_tag; +using cuvs::neighbors::cagra::detail::get_sample_filter_name; +using cuvs::neighbors::cagra::detail::get_source_index_type_tag; // JIT version of random_pickup template diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 64e6d2b3d8..c895e39492 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -9,14 +9,19 @@ #error "search_single_cta_kernel_launcher_jit.cuh included but CUVS_ENABLE_JIT_LTO not defined!" #endif +#include +#include + // Include tags header before any other includes that might open namespaces #include #include "compute_distance.hpp" // For dataset_descriptor_host #include "jit_lto_kernels/search_single_cta_planner.hpp" -#include "search_plan.cuh" // For search_params +#include "sample_filter_utils.cuh" // For CagraSampleFilterWithQueryIdOffset +#include "search_plan.cuh" // For search_params #include "search_single_cta_kernel-inl.cuh" // For resource_queue_t, local_deque_t, launcher_t, persistent_runner_base_t, etc. #include "search_single_cta_kernel_launcher_common.cuh" +#include "shared_launcher_jit.hpp" // For shared JIT helper functions #include #include @@ -40,9 +45,17 @@ #include #include #include +#include namespace cuvs::neighbors::cagra::detail::single_cta_search { +// Import shared JIT helper functions +using cuvs::neighbors::cagra::detail::get_data_type_tag; +using cuvs::neighbors::cagra::detail::get_distance_type_tag; +using cuvs::neighbors::cagra::detail::get_index_type_tag; +using cuvs::neighbors::cagra::detail::get_sample_filter_name; +using cuvs::neighbors::cagra::detail::get_source_index_type_tag; + // The launcher uses types from search_single_cta_kernel-inl.cuh (worker_handle_t, job_desc_t) // The JIT kernel headers define _jit versions that are compatible @@ -53,55 +66,7 @@ auto get_runner_jit(Args... args) -> std::shared_ptr; template auto create_runner_jit(Args... args) -> std::shared_ptr; -// Helper functions to get tags for JIT LTO -namespace { -template -constexpr auto get_data_type_tag() -{ - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_f{}; } - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_h{}; } - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_sc{}; } - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_uc{}; } -} - -template -constexpr auto get_index_type_tag() -{ - if constexpr (std::is_same_v) { - return cuvs::neighbors::cagra::detail::tag_idx_ui{}; - } -} - -template -constexpr auto get_distance_type_tag() -{ - if constexpr (std::is_same_v) { return cuvs::neighbors::cagra::detail::tag_dist_f{}; } -} - -template -constexpr auto get_source_index_type_tag() -{ - if constexpr (std::is_same_v) { - return cuvs::neighbors::cagra::detail::tag_idx_ui{}; - } -} - -template -std::string get_sample_filter_name() -{ - if constexpr (std::is_same_v) { - return "filter_none"; - } else if constexpr ( - std::is_same_v> || - std::is_same_v>) { - return "filter_bitset"; - } else { - // Default to none filter for unknown types - return "filter_none"; - } -} -} // namespace +// Helper functions are now in shared_launcher_jit.hpp // JIT-compatible launcher_t that works with worker_handle_t (same as non-JIT version) struct alignas(kCacheLineBytes) launcher_jit_t { @@ -411,16 +376,21 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn nullptr)) // descriptor not needed in hash { // Extract bitset data from filter object (if it's a bitset_filter) - bitset_ptr = nullptr; - bitset_len = 0; - original_nbits = 0; + // Handle both direct bitset_filter and CagraSampleFilterWithQueryIdOffset wrapper + bitset_ptr = nullptr; + bitset_len = 0; + original_nbits = 0; + uint32_t query_id_offset = 0; if constexpr (!std::is_same_v) { - // Try to extract bitset data from the filter + // All non-none filters are wrapped in CagraSampleFilterWithQueryIdOffset + // Access .filter and .offset directly + query_id_offset = sample_filter.offset; + using InnerFilter = decltype(sample_filter.filter); if constexpr (std::is_same_v< - SampleFilterT, + InnerFilter, cuvs::neighbors::filtering::bitset_filter>) { - auto bitset_view = sample_filter.view(); + auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); @@ -469,35 +439,37 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn // Launch the persistent kernel via AlgorithmLauncher // The persistent kernel now takes the descriptor pointer directly - launcher->dispatch_cooperative(stream, - gs, - bs, - smem_size, - worker_handles_ptr, - job_descriptors_ptr, - completion_counters_ptr, - graph.data_handle(), - graph.extent(1), - source_indices_ptr, - num_random_samplings, - rand_xor_mask, - nullptr, // seed_ptr - num_seeds, - hashmap_ptr, - max_candidates, - max_itopk, - itopk_size, - search_width, - min_iterations, - max_iterations, - nullptr, // num_executed_iterations - hash_bitlen, - small_hash_bitlen, - small_hash_reset_interval, - dev_desc, // Pass descriptor pointer - bitset_ptr, - bitset_len, - original_nbits); + launcher->dispatch_cooperative( + stream, + gs, + bs, + smem_size, + worker_handles_ptr, + job_descriptors_ptr, + completion_counters_ptr, + graph.data_handle(), + graph.extent(1), + source_indices_ptr, + num_random_samplings, + rand_xor_mask, + nullptr, // seed_ptr + num_seeds, + hashmap_ptr, + max_candidates, + max_itopk, + itopk_size, + search_width, + min_iterations, + max_iterations, + nullptr, // num_executed_iterations + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + query_id_offset, // Offset to add to query_id when calling filter + dev_desc, // Pass descriptor pointer + bitset_ptr, + bitset_len, + original_nbits); RAFT_LOG_INFO( "Initialized the JIT persistent kernel in stream %zd; job_queue size = %u; worker_queue size " @@ -606,16 +578,21 @@ void select_and_run_jit( source_indices.has_value() ? source_indices->data_handle() : nullptr; // Extract bitset data from filter object (if it's a bitset_filter) + // Handle both direct bitset_filter and CagraSampleFilterWithQueryIdOffset wrapper uint32_t* bitset_ptr = nullptr; SourceIndexT bitset_len = 0; SourceIndexT original_nbits = 0; + uint32_t query_id_offset = 0; if constexpr (!std::is_same_v) { - // Try to extract bitset data from the filter + // All non-none filters are wrapped in CagraSampleFilterWithQueryIdOffset + // Access .filter and .offset directly + query_id_offset = sample_filter.offset; + using InnerFilter = decltype(sample_filter.filter); if constexpr (std::is_same_v< - SampleFilterT, + InnerFilter, cuvs::neighbors::filtering::bitset_filter>) { - auto bitset_view = sample_filter.view(); + auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); @@ -665,7 +642,7 @@ void select_and_run_jit( dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); - planner.add_sample_filter_device_function(get_sample_filter_name()); + planner.add_sample_filter_device_function(get_sample_filter_name(true)); // Get launcher for persistent kernel auto launcher = planner.get_launcher(); @@ -727,7 +704,7 @@ void select_and_run_jit( dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); - planner.add_sample_filter_device_function(get_sample_filter_name()); + planner.add_sample_filter_device_function(get_sample_filter_name(true)); // Get launcher auto launcher = planner.get_launcher(); @@ -772,6 +749,7 @@ void select_and_run_jit( hash_bitlen, small_hash_bitlen, small_hash_reset_interval, + query_id_offset, // Offset to add to query_id when calling filter dev_desc, // Pass base pointer - kernel expects concrete type but pointer value is same bitset_ptr, bitset_len, diff --git a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp new file mode 100644 index 0000000000..4b4b81536e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp @@ -0,0 +1,97 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#ifndef CUVS_ENABLE_JIT_LTO +#error "shared_launcher_jit.hpp included but CUVS_ENABLE_JIT_LTO not defined!" +#endif + +// Include tags header before any other includes that might open namespaces +#include + +#include "../../sample_filter.cuh" // For none_sample_filter, bitset_filter + +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail { + +// Helper functions to get tags for JIT LTO +template +constexpr auto get_data_type_tag() +{ + if constexpr (std::is_same_v) { return tag_f{}; } + if constexpr (std::is_same_v) { return tag_h{}; } + if constexpr (std::is_same_v) { return tag_sc{}; } + if constexpr (std::is_same_v) { return tag_uc{}; } +} + +template +constexpr auto get_index_type_tag() +{ + if constexpr (std::is_same_v) { return tag_idx_ui{}; } +} + +template +constexpr auto get_distance_type_tag() +{ + if constexpr (std::is_same_v) { return tag_dist_f{}; } +} + +template +constexpr auto get_source_index_type_tag() +{ + if constexpr (std::is_same_v) { return tag_idx_ui{}; } +} + +// Helper trait to detect if a type is a bitset_filter (regardless of template parameters) +template +struct is_bitset_filter : std::false_type {}; + +template +struct is_bitset_filter> + : std::true_type {}; + +template +std::string get_sample_filter_name(bool debug_output = false) +{ + using namespace cuvs::neighbors::filtering; + using DecayedFilter = std::decay_t; + + if (debug_output) { + std::cerr << "[JIT] get_sample_filter_name called" << std::endl; + std::cerr << "[JIT] Type name: " << typeid(DecayedFilter).name() << std::endl; + } + + // First check for none_sample_filter (the only unwrapped case) + if constexpr (std::is_same_v) { + if (debug_output) { std::cerr << "[JIT] Returning: filter_none" << std::endl; } + return "filter_none"; + } + + // All other filters are wrapped in CagraSampleFilterWithQueryIdOffset + // Access the inner filter type via decltype + if constexpr (requires { std::declval().filter; }) { + using InnerFilter = decltype(std::declval().filter); + if constexpr (is_bitset_filter::value || + std::is_same_v> || + std::is_same_v>) { + if (debug_output) { + std::cerr << "[JIT] Returning: filter_bitset (via wrapped filter)" << std::endl; + } + return "filter_bitset"; + } + } + + // Default to none filter for unknown types + if (debug_output) { std::cerr << "[JIT] Returning: filter_none (default/unknown)" << std::endl; } + return "filter_none"; +} + +} // namespace cuvs::neighbors::cagra::detail From b65f59989c5a321a111784685dffc283bb90b0c8 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 18 Feb 2026 17:47:53 +0000 Subject: [PATCH 106/158] occassional failure on dgx spark --- .../modules/generate_jit_lto_kernels.cmake | 166 ++++++++-------- .../cuvs/detail/jit_lto/AlgorithmLauncher.hpp | 13 ++ .../jit_lto/cagra/search_single_cta_tags.hpp | 1 + cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 28 ++- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 176 ++++++++++------- cpp/src/detail/jit_lto/FragmentDatabase.cu | 9 +- .../jit_lto_kernels/apply_filter_kernel.cu.in | 1 - ...mpute_distance_to_child_nodes_kernel.cu.in | 1 - ...e_distance_to_child_nodes_kernel_vpq.cu.in | 1 - .../extern_device_functions.cuh | 6 +- .../cagra/jit_lto_kernels/filter_data.h | 22 +-- .../random_pickup_kernel.cu.in | 1 - .../random_pickup_kernel_vpq.cu.in | 1 - .../search_multi_cta_kernel.cu.in | 6 +- .../search_multi_cta_kernel_jit.cuh | 13 +- .../search_multi_cta_kernel_vpq.cu.in | 7 +- .../search_multi_kernel_jit.cuh | 5 +- .../search_single_cta_kernel.cu.in | 3 +- .../search_single_cta_kernel_jit.cuh | 5 +- .../search_single_cta_kernel_p.cu.in | 3 +- .../search_single_cta_kernel_p_vpq.cu.in | 5 +- .../search_single_cta_kernel_vpq.cu.in | 5 +- .../search_multi_cta_kernel_launcher_jit.cuh | 127 ++++++++++-- .../detail/cagra/search_multi_kernel.cuh | 40 +++- .../search_multi_kernel_launcher_jit.cuh | 145 ++++++++++++-- .../search_single_cta_kernel_launcher_jit.cuh | 183 +++++++++++++----- .../detail/cagra/shared_launcher_jit.hpp | 15 +- .../{cagra => }/jit_lto_kernels/filter.cu.in | 9 +- .../jit_lto_kernels/filter_bitset.cuh | 18 +- .../detail/jit_lto_kernels/filter_data.h | 28 +++ .../jit_lto_kernels/filter_none.cuh | 10 +- .../ivf_flat_interleaved_scan_jit.cuh | 9 +- .../jit_lto_kernels/filter_bitset.cuh | 64 ------ .../ivf_flat/jit_lto_kernels/filter_none.cuh | 24 --- .../interleaved_scan_kernel.cu.in | 1 + .../interleaved_scan_planner.hpp | 2 +- .../ivf_flat_interleaved_scan_kernel.cuh | 39 ++-- 37 files changed, 756 insertions(+), 436 deletions(-) rename cpp/src/neighbors/detail/{cagra => }/jit_lto_kernels/filter.cu.in (74%) rename cpp/src/neighbors/detail/{cagra => }/jit_lto_kernels/filter_bitset.cuh (79%) create mode 100644 cpp/src/neighbors/detail/jit_lto_kernels/filter_data.h rename cpp/src/neighbors/detail/{cagra => }/jit_lto_kernels/filter_none.cuh (58%) delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 187dfe731e..3e5a8776d2 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -182,14 +182,18 @@ function(generate_jit_lto_kernels target) endforeach() endforeach() + # Generate IVF Flat sample filter fragments using shared implementation foreach(filter_name IN LISTS filter_configs) - set(header_file "neighbors/ivf_flat/jit_lto_kernels/${filter_name}.cuh") - - set(kernel_name "${filter_name}") + set(header_file "neighbors/detail/jit_lto_kernels/${filter_name}.cuh") + set(kernel_name "${filter_name}_${idx_abbrev}") set(filename "${generated_kernels_dir}/filter_device_functions/fatbin_${kernel_name}.cu") + set(source_index_type "int64_t") + set(namespace "cuvs::neighbors::detail") + set(filter_name_var "${filter_name}") + set(kernel_name_var "${kernel_name}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in" - "${filename}" @ONLY + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/jit_lto_kernels/filter.cu.in" "${filename}" + @ONLY ) embed_jit_lto_fatbin( FATBIN_TARGET "fatbin_${kernel_name}" @@ -232,6 +236,9 @@ function(generate_jit_lto_kernels target) set(cagra_pq_bits 8) set(cagra_pq_lens 2 4) set(cagra_codebook_type "half") + # CAGRA kernels only use uint32_t as SourceIndexT (matching non-JIT path) + set(cagra_source_index_types "uint32_t") + set(cagra_source_index_abbrevs "ui") # Generate standard descriptor fragments foreach(data_idx IN ITEMS 0 1 2 3) @@ -434,9 +441,12 @@ function(generate_jit_lto_kernels target) ) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # Regular kernel entrypoint - generate for each combination + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") + # Regular kernel entrypoint set(kernel_name - "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" @@ -447,10 +457,8 @@ function(generate_jit_lto_kernels target) set(dataset_block_dim "${dataset_block_dim}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") - set(source_index_type "${cagra_index_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") - set(src_idx_abbrev "${cagra_index_abbrev}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in" "${filename}" @@ -464,9 +472,9 @@ function(generate_jit_lto_kernels target) EMBEDDED_ARRAY "embedded_${kernel_name}" ) - # Persistent kernel entrypoint - generate for each combination + # Persistent kernel entrypoint set(kernel_name - "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" @@ -477,10 +485,8 @@ function(generate_jit_lto_kernels target) set(dataset_block_dim "${dataset_block_dim}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") - set(source_index_type "${cagra_index_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") - set(src_idx_abbrev "${cagra_index_abbrev}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in" "${filename}" @@ -493,12 +499,12 @@ function(generate_jit_lto_kernels target) EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" EMBEDDED_ARRAY "embedded_${kernel_name}" ) - endforeach() - endforeach() - endforeach() - endforeach() - endforeach() - endforeach() + endforeach() # dataset_block_dim + endforeach() # team_size + endforeach() # merge_idx + endforeach() # topk_idx + endforeach() # metric_idx + endforeach() # data_idx # Generate single_cta VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to # team_size and dataset_block_dim @@ -518,9 +524,12 @@ function(generate_jit_lto_kernels target) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) foreach(pq_len IN LISTS cagra_pq_lens) + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") # Regular VPQ kernel entrypoint set(kernel_name - "search_single_cta_kernel_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + "search_single_cta_kernel_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" @@ -535,10 +544,8 @@ function(generate_jit_lto_kernels target) set(codebook_type "${cagra_codebook_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") - set(source_index_type "${cagra_index_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") - set(src_idx_abbrev "${cagra_index_abbrev}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in" "${filename}" @@ -554,7 +561,7 @@ function(generate_jit_lto_kernels target) # Persistent VPQ kernel entrypoint set(kernel_name - "search_single_cta_kernel_p_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + "search_single_cta_kernel_p_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" @@ -569,10 +576,8 @@ function(generate_jit_lto_kernels target) set(codebook_type "${cagra_codebook_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") - set(source_index_type "${cagra_index_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") - set(src_idx_abbrev "${cagra_index_abbrev}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in" "${filename}" @@ -585,19 +590,20 @@ function(generate_jit_lto_kernels target) EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" EMBEDDED_ARRAY "embedded_${kernel_name}" ) - endforeach() - endforeach() - endforeach() - endforeach() - endforeach() - endforeach() + endforeach() # pq_len + endforeach() # dataset_block_dim + endforeach() # team_size + endforeach() # merge_idx + endforeach() # topk_idx + endforeach() # data_idx # Generate multi_cta kernel entrypoints Multi_cta kernels don't use topk_by_bitonic_sort or # bitonic_sort_and_merge_multi_warps as template parameters (those are handled inside the kernel # based on max_elements) IMPORTANT: Need to generate kernels for all combinations of team_size and # dataset_block_dim because the kernel template uses DescriptorT::kTeamSize and # DescriptorT::kDatasetBlockDim as template parameters when calling - # setup_workspace_standard/compute_distance_standard + # setup_workspace_standard/compute_distance_standard CAGRA only uses uint32_t as SourceIndexT + # (matching non-JIT path) foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) @@ -626,9 +632,12 @@ function(generate_jit_lto_kernels target) endif() foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # Multi_cta kernel entrypoint - generate for each combination + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") + # Multi_cta kernel entrypoint set(kernel_name - "search_multi_cta_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + "search_multi_cta_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") set(metric "${metric}") @@ -637,10 +646,8 @@ function(generate_jit_lto_kernels target) set(dataset_block_dim "${dataset_block_dim}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") - set(source_index_type "${cagra_index_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") - set(src_idx_abbrev "${cagra_index_abbrev}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in" "${filename}" @@ -653,23 +660,26 @@ function(generate_jit_lto_kernels target) EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" EMBEDDED_ARRAY "embedded_${kernel_name}" ) - endforeach() - endforeach() - endforeach() - endforeach() + endforeach() # dataset_block_dim + endforeach() # team_size + endforeach() # metric_idx + endforeach() # data_idx # Generate multi_cta VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to # team_size and dataset_block_dim VPQ is supported for all data types (float, half, int8_t, - # uint8_t) + # uint8_t) CAGRA only uses uint32_t as SourceIndexT foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) foreach(pq_len IN LISTS cagra_pq_lens) - # Multi_cta VPQ kernel entrypoint - generate for each combination + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") + # Multi_cta VPQ kernel entrypoint set(kernel_name - "search_multi_cta_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + "search_multi_cta_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") set(metric "L2Expanded") @@ -682,10 +692,8 @@ function(generate_jit_lto_kernels target) set(codebook_type "${cagra_codebook_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") - set(source_index_type "${cagra_index_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") - set(src_idx_abbrev "${cagra_index_abbrev}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in" "${filename}" @@ -698,10 +706,10 @@ function(generate_jit_lto_kernels target) EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" EMBEDDED_ARRAY "embedded_${kernel_name}" ) - endforeach() - endforeach() - endforeach() - endforeach() + endforeach() # pq_len + endforeach() # dataset_block_dim + endforeach() # team_size + endforeach() # data_idx # Generate multi_kernel kernel entrypoints Multi_kernel has two separate kernels: # random_pickup_kernel and compute_distance_to_child_nodes_kernel @@ -734,7 +742,8 @@ function(generate_jit_lto_kernels target) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # random_pickup_kernel entrypoint - generate for each combination + # random_pickup_kernel entrypoint - generate for each combination Note: + # random_pickup_kernel doesn't use SourceIndexT, so no loop needed set(kernel_name "random_pickup_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) @@ -760,9 +769,12 @@ function(generate_jit_lto_kernels target) EMBEDDED_ARRAY "embedded_${kernel_name}" ) - # compute_distance_to_child_nodes_kernel entrypoint - generate for each combination + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") + # compute_distance_to_child_nodes_kernel entrypoint set(kernel_name - "compute_distance_to_child_nodes_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + "compute_distance_to_child_nodes_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") set(metric "${metric}") @@ -771,10 +783,8 @@ function(generate_jit_lto_kernels target) set(dataset_block_dim "${dataset_block_dim}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") - set(source_index_type "${cagra_index_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") - set(src_idx_abbrev "${cagra_index_abbrev}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in" "${filename}" @@ -787,10 +797,10 @@ function(generate_jit_lto_kernels target) EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" EMBEDDED_ARRAY "embedded_${kernel_name}" ) - endforeach() - endforeach() - endforeach() - endforeach() + endforeach() # dataset_block_dim + endforeach() # team_size + endforeach() # metric_idx + endforeach() # data_idx # Generate multi_kernel VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to # team_size and dataset_block_dim VPQ is supported for all data types (float, half, int8_t, @@ -831,9 +841,12 @@ function(generate_jit_lto_kernels target) EMBEDDED_ARRAY "embedded_${kernel_name}" ) + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") # compute_distance_to_child_nodes_kernel VPQ entrypoint set(kernel_name - "compute_distance_to_child_nodes_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + "compute_distance_to_child_nodes_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") set(metric "L2Expanded") @@ -846,10 +859,8 @@ function(generate_jit_lto_kernels target) set(codebook_type "${cagra_codebook_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") - set(source_index_type "${cagra_index_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") - set(src_idx_abbrev "${cagra_index_abbrev}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in" "${filename}" @@ -862,23 +873,23 @@ function(generate_jit_lto_kernels target) EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" EMBEDDED_ARRAY "embedded_${kernel_name}" ) - endforeach() - endforeach() - endforeach() - endforeach() + endforeach() # pq_len + endforeach() # dataset_block_dim + endforeach() # team_size + endforeach() # data_idx # Generate apply_filter_kernel entrypoints apply_filter_kernel doesn't use dataset_descriptor, so - # it only needs index types + # it only needs index types CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") set(kernel_name - "apply_filter_kernel_${cagra_index_abbrev}_${cagra_distance_abbrev}_${cagra_index_abbrev}" + "apply_filter_kernel_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") - set(source_index_type "${cagra_index_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") - set(src_idx_abbrev "${cagra_index_abbrev}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in" "${filename}" @@ -892,19 +903,22 @@ function(generate_jit_lto_kernels target) EMBEDDED_ARRAY "embedded_${kernel_name}" ) - # Generate CAGRA sample filter fragments + # Generate CAGRA sample filter fragments using shared implementation CAGRA only uses uint32_t as + # SourceIndexT (matching non-JIT path) set(cagra_filter_configs "filter_none" "filter_bitset") foreach(filter_name IN LISTS cagra_filter_configs) - set(header_file "neighbors/detail/cagra/jit_lto_kernels/${filter_name}.cuh") - set(kernel_name "${filter_name}_${cagra_index_abbrev}") + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") + set(header_file "neighbors/detail/jit_lto_kernels/${filter_name}.cuh") + set(kernel_name "${filter_name}_${src_idx_abbrev}") set(filename "${generated_kernels_dir}/cagra_filter_device_functions/fatbin_${kernel_name}.cu") - set(source_index_type "${cagra_index_type}") + set(namespace "cuvs::neighbors::detail") # Pass both filter_name (for include) and kernel_name (for registration) set(filter_name_var "${filter_name}") set(kernel_name_var "${kernel_name}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/filter.cu.in" - "${filename}" @ONLY + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/jit_lto_kernels/filter.cu.in" "${filename}" + @ONLY ) embed_jit_lto_fatbin( FATBIN_TARGET "fatbin_${kernel_name}" @@ -913,5 +927,5 @@ function(generate_jit_lto_kernels target) EMBEDDED_HEADER "${generated_kernels_dir}/cagra_filter_device_functions/${kernel_name}.h" EMBEDDED_ARRAY "embedded_${kernel_name}" ) - endforeach() + endforeach() # filter_name endfunction() diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp index f376342c4b..3d66739ce2 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp @@ -32,7 +32,20 @@ struct AlgorithmLauncher { template void dispatch(cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, Args&&... args) { + // Create array of pointers to arguments + // NOTE: cudaLaunchKernelExC copies the parameter values synchronously before returning, + // so the local array and argument references are safe even though the kernel launch is async void* kernel_args[] = {const_cast(static_cast(&args))...}; + + // Validate that we're not passing null pointers for critical parameters + // (This is a sanity check - actual validation should be done by callers) + for (size_t i = 0; i < sizeof...(args); ++i) { + if (kernel_args[i] == nullptr) { + // Some parameters might legitimately be nullptr, so we just log a warning + // The kernel itself should validate critical pointers + } + } + this->call(stream, grid, block, shared_mem, kernel_args); } diff --git a/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp b/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp index b6ed4c786c..2db9a35583 100644 --- a/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp +++ b/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp @@ -15,6 +15,7 @@ struct tag_uc {}; // uint8_t // Tag types for index types struct tag_idx_ui {}; // uint32_t +struct tag_idx_l {}; // int64_t // Tag types for distance types struct tag_dist_f {}; // float diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 36e0216bcd..540e0977f8 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -48,16 +48,16 @@ AlgorithmLauncher& AlgorithmLauncher::operator=(AlgorithmLauncher&& other) noexc void AlgorithmLauncher::call( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) { + // Validate kernel and library handles before use + if (kernel == nullptr) { RAFT_FAIL("AlgorithmLauncher::call - kernel is NULL!"); } + if (library == nullptr) { RAFT_FAIL("AlgorithmLauncher::call - library is NULL!"); } + if (kernel_args == nullptr) { RAFT_FAIL("AlgorithmLauncher::call - kernel_args is NULL!"); } + // Debug: verify kernel is being called - if (kernel != nullptr) { - std::cerr << "[JIT] AlgorithmLauncher::call - kernel is not null, launching with grid=(" - << grid.x << "," << grid.y << "," << grid.z << ") block=(" << block.x << "," - << block.y << "," << block.z << ")" << std::endl; - std::cerr.flush(); - } else { - std::cerr << "[JIT] ERROR: AlgorithmLauncher::call - kernel is NULL!" << std::endl; - std::cerr.flush(); - } + std::cerr << "[JIT] AlgorithmLauncher::call - kernel is not null, launching with grid=(" << grid.x + << "," << grid.y << "," << grid.z << ") block=(" << block.x << "," << block.y << "," + << block.z << ")" << std::endl; + std::cerr.flush(); cudaLaunchAttribute attribute[1]; attribute[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; @@ -74,6 +74,8 @@ void AlgorithmLauncher::call( std::cerr << "[JIT] AlgorithmLauncher::call - About to launch kernel" << std::endl; std::cerr.flush(); + // NOTE: cudaLaunchKernelExC copies parameter values synchronously before returning, + // so the kernel_args array and the values it points to are safe even though the launch is async cudaError_t err = cudaLaunchKernelExC(&config, kernel, kernel_args); if (err != cudaSuccess) { std::cerr << "[JIT] ERROR: cudaLaunchKernelExC failed with: " << cudaGetErrorString(err) << " (" @@ -84,6 +86,14 @@ void AlgorithmLauncher::call( std::cerr.flush(); } RAFT_CUDA_TRY(err); + + // Check for immediate errors after launch (catches parameter issues early) + cudaError_t peek_err = cudaPeekAtLastError(); + if (peek_err != cudaSuccess) { + std::cerr << "[JIT] WARNING: Error detected immediately after kernel launch: " + << cudaGetErrorString(peek_err) << " (" << peek_err << ")" << std::endl; + std::cerr.flush(); + } } void AlgorithmLauncher::call_cooperative( diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 3b3f804929..7942460f52 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -25,17 +25,29 @@ void AlgorithmPlanner::add_entrypoint() { + RAFT_LOG_INFO("[JIT FRAGMENT] Looking up entrypoint fragment: %s", this->entrypoint.c_str()); auto entrypoint_fragment = fragment_database().get_fragment(this->entrypoint); if (entrypoint_fragment == nullptr) { RAFT_FAIL("Entrypoint fragment is NULL for: %s", this->entrypoint.c_str()); } + RAFT_LOG_INFO("[JIT FRAGMENT] Entrypoint fragment found: %s (ptr: %p)", + this->entrypoint.c_str(), + entrypoint_fragment); this->fragments.push_back(entrypoint_fragment); } void AlgorithmPlanner::add_device_functions() { for (const auto& device_function_key : this->device_functions) { + RAFT_LOG_INFO("[JIT FRAGMENT] Looking up device function fragment: %s", + device_function_key.c_str()); auto device_function_fragment = fragment_database().get_fragment(device_function_key); + if (device_function_fragment == nullptr) { + RAFT_FAIL("Device function fragment is NULL for: %s", device_function_key.c_str()); + } + RAFT_LOG_INFO("[JIT FRAGMENT] Device function fragment found: %s (ptr: %p)", + device_function_key.c_str(), + device_function_fragment); this->fragments.push_back(device_function_fragment); } } @@ -57,11 +69,24 @@ std::shared_ptr AlgorithmPlanner::get_launcher() static std::mutex cache_mutex; std::lock_guard lock(cache_mutex); if (launchers.count(launch_key) == 0) { + RAFT_LOG_INFO( + "[JIT CACHE] Cache MISS - Building new launcher for key: %s (entrypoint: %s, " + "device_functions: %s)", + launch_key.c_str(), + this->entrypoint.c_str(), + this->get_device_functions_key().c_str()); add_entrypoint(); add_device_functions(); launchers[launch_key] = this->build(); + RAFT_LOG_INFO("[JIT CACHE] Launcher built and cached (kernel handle: %p)", + launchers[launch_key]->get_kernel()); } else { - RAFT_LOG_DEBUG("Using cached JIT launcher for entrypoint: %s", this->entrypoint.c_str()); + RAFT_LOG_INFO( + "[JIT CACHE] Cache HIT - Reusing cached launcher for key: %s (entrypoint: %s, kernel handle: " + "%p)", + launch_key.c_str(), + this->entrypoint.c_str(), + launchers[launch_key]->get_kernel()); } return launchers[launch_key]; } @@ -79,51 +104,52 @@ std::shared_ptr AlgorithmPlanner::build() // Generate individual cubin for each device function fragment for debugging // Skip entrypoint fragment as it depends on device functions and will fail to link alone - for (auto& frag : this->fragments) { - // Skip if this is the entrypoint fragment - if (frag->compute_key == this->entrypoint) { continue; } - - nvJitLinkHandle frag_handle; - const char* frag_lopts[] = {"-lto", archs.c_str()}; - auto frag_result = nvJitLinkCreate(&frag_handle, 2, frag_lopts); - check_nvjitlink_result(frag_handle, frag_result); - - frag->add_to(frag_handle); - - frag_result = nvJitLinkComplete(frag_handle); - check_nvjitlink_result(frag_handle, frag_result); - - size_t frag_cubin_size; - frag_result = nvJitLinkGetLinkedCubinSize(frag_handle, &frag_cubin_size); - check_nvjitlink_result(frag_handle, frag_result); - - if (frag_cubin_size > 0) { - std::unique_ptr frag_cubin{new char[frag_cubin_size]}; - frag_result = nvJitLinkGetLinkedCubin(frag_handle, frag_cubin.get()); - check_nvjitlink_result(frag_handle, frag_result); - - // Save individual fragment cubin - std::string frag_cubin_path = "/tmp/fragment_cubin_" + frag->compute_key + ".cubin"; - std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), '/', '_'); - std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), ':', '_'); - std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), '<', '_'); - std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), '>', '_'); - std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), ' ', '_'); - FILE* frag_f = fopen(frag_cubin_path.c_str(), "wb"); - if (frag_f) { - size_t written = fwrite(frag_cubin.get(), 1, frag_cubin_size, frag_f); - fclose(frag_f); - if (written == frag_cubin_size) { - std::cerr << "[JIT] Saved fragment cubin: " << frag_cubin_path - << " (size: " << frag_cubin_size << " bytes)" << std::endl; - std::cerr << "[JIT] Run: cuobjdump --dump-elf-symbols " << frag_cubin_path << std::endl; - } - } - } - - frag_result = nvJitLinkDestroy(&frag_handle); - RAFT_EXPECTS(frag_result == NVJITLINK_SUCCESS, "nvJitLinkDestroy failed for fragment"); - } + // for (auto& frag : this->fragments) { + // // Skip if this is the entrypoint fragment + // if (frag->compute_key == this->entrypoint) { continue; } + + // nvJitLinkHandle frag_handle; + // const char* frag_lopts[] = {"-lto", archs.c_str()}; + // auto frag_result = nvJitLinkCreate(&frag_handle, 2, frag_lopts); + // check_nvjitlink_result(frag_handle, frag_result); + + // frag->add_to(frag_handle); + + // frag_result = nvJitLinkComplete(frag_handle); + // check_nvjitlink_result(frag_handle, frag_result); + + // size_t frag_cubin_size; + // frag_result = nvJitLinkGetLinkedCubinSize(frag_handle, &frag_cubin_size); + // check_nvjitlink_result(frag_handle, frag_result); + + // if (frag_cubin_size > 0) { + // std::unique_ptr frag_cubin{new char[frag_cubin_size]}; + // frag_result = nvJitLinkGetLinkedCubin(frag_handle, frag_cubin.get()); + // check_nvjitlink_result(frag_handle, frag_result); + + // // Save individual fragment cubin + // std::string frag_cubin_path = "/tmp/fragment_cubin_" + frag->compute_key + ".cubin"; + // std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), '/', '_'); + // std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), ':', '_'); + // std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), '<', '_'); + // std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), '>', '_'); + // std::replace(frag_cubin_path.begin(), frag_cubin_path.end(), ' ', '_'); + // FILE* frag_f = fopen(frag_cubin_path.c_str(), "wb"); + // if (frag_f) { + // size_t written = fwrite(frag_cubin.get(), 1, frag_cubin_size, frag_f); + // fclose(frag_f); + // if (written == frag_cubin_size) { + // std::cerr << "[JIT] Saved fragment cubin: " << frag_cubin_path + // << " (size: " << frag_cubin_size << " bytes)" << std::endl; + // std::cerr << "[JIT] Run: cuobjdump --dump-elf-symbols " << frag_cubin_path << + // std::endl; + // } + // } + // } + + // frag_result = nvJitLinkDestroy(&frag_handle); + // RAFT_EXPECTS(frag_result == NVJITLINK_SUCCESS, "nvJitLinkDestroy failed for fragment"); + // } // Load the generated LTO IR and link them together nvJitLinkHandle handle; @@ -150,35 +176,35 @@ std::shared_ptr AlgorithmPlanner::build() RAFT_EXPECTS(result == NVJITLINK_SUCCESS, "nvJitLinkDestroy failed"); // Save cubin to disk for inspection with cuobjdump - std::string cubin_path = "/tmp/linked_cubin_" + this->entrypoint + ".cubin"; - // Sanitize filename (replace special chars) - std::replace(cubin_path.begin(), cubin_path.end(), '/', '_'); - std::replace(cubin_path.begin(), cubin_path.end(), ':', '_'); - std::replace(cubin_path.begin(), cubin_path.end(), '<', '_'); - std::replace(cubin_path.begin(), cubin_path.end(), '>', '_'); - std::replace(cubin_path.begin(), cubin_path.end(), ' ', '_'); - FILE* f = fopen(cubin_path.c_str(), "wb"); - if (f) { - size_t written = fwrite(cubin.get(), 1, cubin_size, f); - fclose(f); - if (written == cubin_size) { - std::cerr << "[JIT] =========================================" << std::endl; - std::cerr << "[JIT] Saved linked cubin to: " << cubin_path << " (size: " << cubin_size - << " bytes)" << std::endl; - std::cerr << "[JIT] Run: cuobjdump --dump-elf-symbols " << cubin_path - << " to see kernel symbols" << std::endl; - std::cerr << "[JIT] =========================================" << std::endl; - std::cerr.flush(); - } else { - std::cerr << "[JIT] WARNING: Failed to write full cubin (wrote " << written << " of " - << cubin_size << " bytes)" << std::endl; - std::cerr.flush(); - } - } else { - std::cerr << "[JIT] WARNING: Failed to open cubin file for writing: " << cubin_path - << " (errno: " << errno << ")" << std::endl; - std::cerr.flush(); - } + // std::string cubin_path = "/tmp/linked_cubin_" + this->entrypoint + ".cubin"; + // // Sanitize filename (replace special chars) + // std::replace(cubin_path.begin(), cubin_path.end(), '/', '_'); + // std::replace(cubin_path.begin(), cubin_path.end(), ':', '_'); + // std::replace(cubin_path.begin(), cubin_path.end(), '<', '_'); + // std::replace(cubin_path.begin(), cubin_path.end(), '>', '_'); + // std::replace(cubin_path.begin(), cubin_path.end(), ' ', '_'); + // FILE* f = fopen(cubin_path.c_str(), "wb"); + // if (f) { + // size_t written = fwrite(cubin.get(), 1, cubin_size, f); + // fclose(f); + // if (written == cubin_size) { + // std::cerr << "[JIT] =========================================" << std::endl; + // std::cerr << "[JIT] Saved linked cubin to: " << cubin_path << " (size: " << cubin_size + // << " bytes)" << std::endl; + // std::cerr << "[JIT] Run: cuobjdump --dump-elf-symbols " << cubin_path + // << " to see kernel symbols" << std::endl; + // std::cerr << "[JIT] =========================================" << std::endl; + // std::cerr.flush(); + // } else { + // std::cerr << "[JIT] WARNING: Failed to write full cubin (wrote " << written << " of " + // << cubin_size << " bytes)" << std::endl; + // std::cerr.flush(); + // } + // } else { + // std::cerr << "[JIT] WARNING: Failed to open cubin file for writing: " << cubin_path + // << " (errno: " << errno << ")" << std::endl; + // std::cerr.flush(); + // } // cubin is linked, so now load it cudaLibrary_t library; diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index 0f70ac16c0..0322c978e3 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -7,6 +7,7 @@ #include #include +#include FragmentDatabase::FragmentDatabase() {} @@ -32,7 +33,13 @@ FragmentEntry* FragmentDatabase::get_fragment(std::string const& key) auto& db = fragment_database(); auto val = db.cache.find(key); RAFT_EXPECTS(val != db.cache.end(), "FragmentDatabase: Key not found: %s", key.c_str()); - return val->second.get(); + auto* fragment = val->second.get(); + if (fragment == nullptr) { + RAFT_LOG_WARN("[JIT FRAGMENT] Fragment key exists but entry is NULL: %s (cache size: %zu)", + key.c_str(), + db.cache.size()); + } + return fragment; } void registerFatbinFragment(std::string const& algo, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in index db47670809..f102f5386b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in @@ -8,7 +8,6 @@ #ifdef BUILD_KERNEL #include -#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index e7b61a4a7e..54eaefbd04 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -9,7 +9,6 @@ #include #include -#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in index efec282811..0d16b4a411 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in @@ -9,7 +9,6 @@ #include #include -#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh index 3437f20d3b..b4455df0a3 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -111,6 +111,10 @@ extern __device__ DistanceT compute_distance_vpq( const typename dataset_descriptor_base_t::args_t args, IndexT dataset_index); +} // namespace cuvs::neighbors::cagra::detail + +namespace cuvs::neighbors::detail { + // Sample filter extern function - linked separately via JIT LTO // Takes 3 params: query_id, node_id, and filter_data (void* pointer to filter-specific data) // For none_filter: filter_data can be nullptr @@ -118,4 +122,4 @@ extern __device__ DistanceT compute_distance_vpq( template extern __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* filter_data); -} // namespace cuvs::neighbors::cagra::detail +} // namespace cuvs::neighbors::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h index b671eda513..178ed6dd9b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h @@ -5,23 +5,5 @@ #pragma once -#include -#include - -namespace cuvs::neighbors::cagra::detail { - -// Structure to hold bitset filter data -// This is passed as void* to the extern sample_filter function -template -struct bitset_filter_data_t { - uint32_t* bitset_ptr; // Pointer to bitset data in global memory - SourceIndexT bitset_len; // Length of bitset array - SourceIndexT original_nbits; // Original number of bits - - __device__ bitset_filter_data_t(uint32_t* ptr, SourceIndexT len, SourceIndexT nbits) - : bitset_ptr(ptr), bitset_len(len), original_nbits(nbits) - { - } -}; - -} // namespace cuvs::neighbors::cagra::detail +// Use the shared filter_data.h +#include "../../jit_lto_kernels/filter_data.h" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in index f00927694c..dca6be6b90 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -9,7 +9,6 @@ #include #include -#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in index ffffe7b3ed..b2bd39feca 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in @@ -9,7 +9,6 @@ #include #include -#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index 22ebdc1109..c14c04d9b3 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -12,19 +12,15 @@ #include #include -#include #include -#include #include -#include -#include namespace cuvs::neighbors::cagra::detail::multi_cta_search { // Instantiate the search_kernel_jit function with concrete descriptor type using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( - @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, uint32_t*, @source_index_type@, @source_index_type@); + @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh index 783c0fb9c9..622e1fb6b0 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -35,6 +35,7 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; // sample_filter is declared in extern_device_functions.cuh +using cuvs::neighbors::detail::sample_filter; // JIT versions of compute_distance_to_random_nodes and compute_distance_to_child_nodes // are now shared in device_common_jit.cuh - use fully qualified names @@ -68,6 +69,7 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( const uint32_t min_iteration, const uint32_t max_iteration, uint32_t* const num_executed_iterations, /* stats */ + const uint32_t query_id_offset, // Offset to add to query_id when calling filter uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) SourceIndexT bitset_len, // Bitset length SourceIndexT original_nbits) @@ -303,9 +305,9 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( if (parent_indices_buffer[p] != invalid_index) { const auto parent_id = result_indices_buffer[parent_indices_buffer[p]] & ~index_msb_1_mask; // Construct filter_data struct (bitset data is in global memory) - cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + cuvs::neighbors::detail::bitset_filter_data_t filter_data( bitset_ptr, bitset_len, original_nbits); - if (!sample_filter(query_id, + if (!sample_filter(query_id + query_id_offset, to_source_index(parent_id), bitset_ptr != nullptr ? &filter_data : nullptr)) { // If the parent must not be in the resulting top-k list, remove from the parent list @@ -325,10 +327,11 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( if (index == invalid_index) { continue; } index &= ~index_msb_1_mask; // Construct filter_data struct (bitset data is in global memory) - cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + cuvs::neighbors::detail::bitset_filter_data_t filter_data( bitset_ptr, bitset_len, original_nbits); - if (!sample_filter( - query_id, to_source_index(index), bitset_ptr != nullptr ? &filter_data : nullptr)) { + if (!sample_filter(query_id + query_id_offset, + to_source_index(index), + bitset_ptr != nullptr ? &filter_data : nullptr)) { result_indices_buffer[i] = invalid_index; result_distances_buffer[i] = utils::get_max_value(); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in index 0de95f75c8..07e48c7de3 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in @@ -11,20 +11,15 @@ #include // For pickup_next_parent and topk_by_bitonic_sort_wrapper_* #include -#include #include -#include #include -#include -#include -#include namespace cuvs::neighbors::cagra::detail::multi_cta_search { // Instantiate the search_kernel_jit function with concrete VPQ descriptor type using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( - @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, uint32_t*, @source_index_type@, @source_index_type@); + @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh index 085abbec00..a62ccc9c9b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh @@ -304,6 +304,7 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( // JIT version of apply_filter_kernel - uses extern sample_filter function // Bitset data is passed as kernel parameters (matching non-JIT where filter object contains // bitset_view) The bitset data is in global memory (not shared memory), just like non-JIT +using cuvs::neighbors::detail::sample_filter; template @@ -334,10 +335,10 @@ RAFT_KERNEL apply_filter_kernel_jit( : source_indices_ptr[result_indices_ptr[index]]; // Construct filter_data struct in registers (bitset data is in global memory) - cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + cuvs::neighbors::detail::bitset_filter_data_t filter_data( bitset_ptr, bitset_len, original_nbits); - if (!cuvs::neighbors::cagra::detail::sample_filter( + if (!sample_filter( query_id_offset + j, node_id, bitset_ptr != nullptr ? &filter_data : nullptr)) { result_indices_ptr[index] = utils::get_max_value(); result_distances_ptr[index] = utils::get_max_value(); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index d8e1e0ee94..286193c2f0 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -9,14 +9,13 @@ #include #include -#include namespace cuvs::neighbors::cagra::detail::single_cta_search { // Instantiate the search_kernel_jit function with concrete descriptor type using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); + uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh index 5675f1e8e1..bcc1c7bc69 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -77,6 +77,7 @@ using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; // Sample filter extern function // sample_filter is declared in extern_device_functions.cuh +using cuvs::neighbors::detail::sample_filter; // JIT versions of compute_distance_to_random_nodes and compute_distance_to_child_nodes // are now shared in device_common_jit.cuh - use fully qualified names @@ -370,7 +371,7 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( if (parent_list_buffer[p] != invalid_index) { const auto parent_id = result_indices_buffer[parent_list_buffer[p]] & ~index_msb_1_mask; // Construct filter_data struct (bitset data is in global memory) - cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + cuvs::neighbors::detail::bitset_filter_data_t filter_data( bitset_ptr, bitset_len, original_nbits); if (!sample_filter(query_id + query_id_offset, to_source_index(parent_id), @@ -393,7 +394,7 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( for (unsigned i = threadIdx.x; i < internal_topk + search_width * graph_degree; i += blockDim.x) { const auto node_id = result_indices_buffer[i] & ~index_msb_1_mask; // Construct filter_data struct (bitset data is in global memory) - cuvs::neighbors::cagra::detail::bitset_filter_data_t filter_data( + cuvs::neighbors::detail::bitset_filter_data_t filter_data( bitset_ptr, bitset_len, original_nbits); if (node_id != (invalid_index & ~index_msb_1_mask) && !sample_filter(query_id + query_id_offset, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in index 2fbae21acc..fcf335ecef 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in @@ -9,14 +9,13 @@ #include #include -#include namespace cuvs::neighbors::cagra::detail::single_cta_search { // Instantiate the search_kernel_p_jit function with concrete descriptor type using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); + worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in index ad8e1792a9..372aa4e9cb 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in @@ -8,15 +8,14 @@ #ifdef BUILD_KERNEL #include -#include -#include +#include namespace cuvs::neighbors::cagra::detail::single_cta_search { // Instantiate the search_kernel_p_jit function with concrete VPQ descriptor type using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); + worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in index 3f4f13e9ca..462a3c3812 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in @@ -8,15 +8,14 @@ #ifdef BUILD_KERNEL #include -#include -#include +#include namespace cuvs::neighbors::cagra::detail::single_cta_search { // Instantiate the search_kernel_jit function with concrete VPQ descriptor type using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); + uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 3d5d0a6f00..1811a60eb5 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -30,13 +30,6 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { -// Import shared JIT helper functions -using cuvs::neighbors::cagra::detail::get_data_type_tag; -using cuvs::neighbors::cagra::detail::get_distance_type_tag; -using cuvs::neighbors::cagra::detail::get_index_type_tag; -using cuvs::neighbors::cagra::detail::get_sample_filter_name; -using cuvs::neighbors::cagra::detail::get_source_index_type_tag; - // JIT version of select_and_run for multi_cta template ) { - // Try to extract bitset data from the filter - if constexpr (std::is_same_v< - SampleFilterT, - cuvs::neighbors::filtering::bitset_filter>) { - auto bitset_view = sample_filter.view(); + // Check if it has the wrapper members (CagraSampleFilterWithQueryIdOffset) + if constexpr (requires { + sample_filter.filter; + sample_filter.offset; + }) { + using InnerFilter = decltype(sample_filter.filter); + // Always extract offset for wrapped filters + query_id_offset = sample_filter.offset; + RAFT_LOG_INFO("Extracted query_id_offset: %u", query_id_offset); + if constexpr (is_bitset_filter::value) { + // Extract bitset data for bitset_filter (works for any bitset_filter instantiation) + auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); + RAFT_LOG_INFO("Extracted bitset data: bitset_ptr=%p, bitset_len=%zu, original_nbits=%zu", + bitset_ptr, + static_cast(bitset_len), + static_cast(original_nbits)); + } else { + RAFT_LOG_INFO("InnerFilter is not bitset_filter, skipping bitset extraction"); } + } else { + RAFT_LOG_INFO("Filter does not have wrapper members (.filter/.offset), skipping extraction"); } // Create planner with tags @@ -126,7 +142,9 @@ void select_and_run_jit( dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); - planner.add_sample_filter_device_function(get_sample_filter_name()); + std::string filter_name = get_sample_filter_name(); + planner.add_sample_filter_device_function(filter_name); + RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_CTA filter name: %s", filter_name.c_str()); // Get launcher using the planner's entrypoint name and fragment key auto params = make_fragment_key(); @@ -134,6 +152,9 @@ void select_and_run_jit( if (!launcher) { RAFT_FAIL("Failed to get JIT launcher"); } + RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_CTA launcher obtained (kernel handle: %p)", + launcher->get_kernel()); + // Verify kernel handle is valid cudaKernel_t kernel_handle = launcher->get_kernel(); if (kernel_handle == nullptr) { RAFT_FAIL("JIT launcher has null kernel handle"); } @@ -173,6 +194,11 @@ void select_and_run_jit( const dataset_descriptor_base_t* dev_desc_base = dataset_desc.dev_ptr(stream); const auto* dev_desc = dev_desc_base; + if (dev_desc == nullptr) { RAFT_FAIL("Device descriptor pointer is NULL"); } + + // Note: dataset_desc is passed by const reference, so it stays alive for the duration of this + // function The descriptor's state is managed by a shared_ptr internally, so no need to explicitly + // keep it alive // Cast size_t/int64_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly @@ -188,6 +214,40 @@ void select_and_run_jit( const uint32_t max_iterations_u32 = static_cast(ps.max_iterations); const unsigned num_random_samplings_u = static_cast(ps.num_random_samplings); + RAFT_LOG_INFO( + "[JIT LAUNCHER] MULTI_CTA dispatch parameters: graph_degree=%u, traversed_hash_bitlen=%u, " + "itopk_size=%u, bitset_len=%u, original_nbits=%u, query_id_offset=%u", + graph_degree_u32, + traversed_hash_bitlen_u32, + itopk_size_u32, + static_cast(bitset_len), + static_cast(original_nbits), + query_id_offset); + + // Validate critical pointers before dispatch + if (topk_indices_ptr == nullptr) { RAFT_FAIL("MULTI_CTA: topk_indices_ptr is NULL"); } + if (topk_distances_ptr == nullptr) { RAFT_FAIL("MULTI_CTA: topk_distances_ptr is NULL"); } + if (graph.data_handle() == nullptr) { RAFT_FAIL("MULTI_CTA: graph.data_handle() is NULL"); } + if (dev_desc == nullptr) { RAFT_FAIL("MULTI_CTA: dev_desc is NULL"); } + RAFT_LOG_INFO( + "[JIT LAUNCHER] MULTI_CTA pointer validation passed: topk_indices=%p, topk_distances=%p, " + "graph=%p, dev_desc=%p", + topk_indices_ptr, + topk_distances_ptr, + graph.data_handle(), + dev_desc); + + // Log all critical parameters before dispatch to help diagnose issues + RAFT_LOG_INFO( + "[JIT LAUNCHER] MULTI_CTA pre-dispatch: num_queries=%u, topk=%u, num_cta_per_query=%u, " + "max_elements=%u, graph.extent(0)=%zu, graph.extent(1)=%zu", + num_queries, + topk, + num_cta_per_query, + max_elements, + graph.extent(0), + graph.extent(1)); + launcher->dispatch(stream, grid_dims, block_dims, @@ -211,12 +271,47 @@ void select_and_run_jit( min_iterations_u32, // Cast size_t to uint32_t max_iterations_u32, // Cast size_t to uint32_t num_executed_iterations, + query_id_offset, // Offset to add to query_id when calling filter bitset_ptr, bitset_len, original_nbits); - RAFT_CUDA_TRY(cudaPeekAtLastError()); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + // Check for launch errors immediately + cudaError_t launch_err = cudaPeekAtLastError(); + if (launch_err != cudaSuccess) { + RAFT_LOG_ERROR("[JIT LAUNCHER] MULTI_CTA kernel launch error detected: %s (error code: %d)", + cudaGetErrorString(launch_err), + launch_err); + RAFT_CUDA_TRY(launch_err); + } + + // Synchronize to catch kernel execution errors before they propagate + // This ensures the kernel completes before we return, preventing parameter lifetime issues + cudaError_t sync_err = cudaStreamSynchronize(stream); + if (sync_err != cudaSuccess) { + RAFT_LOG_ERROR("[JIT LAUNCHER] MULTI_CTA kernel execution failed: %s (error code: %d)", + cudaGetErrorString(sync_err), + sync_err); + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_CTA parameters: graph_degree=%u, itopk_size=%u, num_queries=%u, " + "topk=%u, num_cta_per_query=%u, max_elements=%u", + graph_degree_u32, + itopk_size_u32, + num_queries, + topk, + num_cta_per_query, + max_elements); + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_CTA pointers: topk_indices=%p, topk_distances=%p, graph=%p, " + "dev_desc=%p", + topk_indices_ptr, + topk_distances_ptr, + graph.data_handle(), + dev_desc); + RAFT_CUDA_TRY(sync_err); + } + + RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_CTA kernel completed successfully"); } } // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index f66fe203c4..512d17896d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -91,6 +91,7 @@ void get_value(T* const host_ptr, const T* const dev_ptr, cudaStream_t cuda_stre template auto get_value(const T* const dev_ptr, cudaStream_t stream) -> T { + if (dev_ptr == nullptr) { RAFT_FAIL("get_value: dev_ptr is NULL"); } T value; RAFT_CUDA_TRY(cudaMemcpyAsync(&value, dev_ptr, sizeof(value), cudaMemcpyDefault, stream)); RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); @@ -755,6 +756,9 @@ struct search RAFT_LOG_DEBUG("# topk_workspace_size: %lu", topk_workspace_size); topk_workspace.resize(topk_workspace_size, raft::resource::get_cuda_stream(res)); terminate_flag.resize(1, raft::resource::get_cuda_stream(res)); + if (terminate_flag.data() == nullptr) { + RAFT_FAIL("Failed to allocate terminate_flag: resize returned nullptr"); + } hashmap.resize(hashmap_size, raft::resource::get_cuda_stream(res)); } @@ -924,11 +928,34 @@ struct search break; } - if (iter + 1 >= min_iterations) { set_value(terminate_flag.data(), 1, stream); } + if (iter + 1 >= min_iterations) { + if (terminate_flag.data() == nullptr) { + RAFT_FAIL("terminate_flag.data() is NULL before set_value at iteration %zu", iter + 1); + } + set_value(terminate_flag.data(), 1, stream); + } // pickup parent nodes uint32_t _small_hash_bitlen = 0; if ((iter + 1) % small_hash_reset_interval == 0) { _small_hash_bitlen = small_hash_bitlen; } + + // Validate all pointers before passing to kernel to prevent memory corruption + if (terminate_flag.data() == nullptr) { + RAFT_FAIL("terminate_flag.data() is NULL before pickup_next_parents at iteration %zu", + iter + 1); + } + if (result_indices.data() == nullptr) { + RAFT_FAIL("result_indices.data() is NULL before pickup_next_parents at iteration %zu", + iter + 1); + } + if (hashmap.data() == nullptr) { + RAFT_FAIL("hashmap.data() is NULL before pickup_next_parents at iteration %zu", iter + 1); + } + if (parent_node_list.data() == nullptr) { + RAFT_FAIL("parent_node_list.data() is NULL before pickup_next_parents at iteration %zu", + iter + 1); + } + pickup_next_parents(result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, result_buffer_allocation_size, itopk_size, @@ -943,9 +970,14 @@ struct search stream); // termination (2) - if (iter + 1 >= min_iterations && get_value(terminate_flag.data(), stream)) { - iter++; - break; + if (iter + 1 >= min_iterations) { + if (terminate_flag.data() == nullptr) { + RAFT_FAIL("terminate_flag.data() is NULL at iteration %zu", iter + 1); + } + if (get_value(terminate_flag.data(), stream)) { + iter++; + break; + } } // Compute distance to child nodes that are adjacent to the parent node diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh index 899318bed3..86220dc6ae 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -32,13 +32,6 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -// Import shared JIT helper functions -using cuvs::neighbors::cagra::detail::get_data_type_tag; -using cuvs::neighbors::cagra::detail::get_distance_type_tag; -using cuvs::neighbors::cagra::detail::get_index_type_tag; -using cuvs::neighbors::cagra::detail::get_sample_filter_name; -using cuvs::neighbors::cagra::detail::get_source_index_type_tag; - // JIT version of random_pickup template void random_pickup_jit(const dataset_descriptor_host& dataset_desc, @@ -56,6 +49,11 @@ void random_pickup_jit(const dataset_descriptor_host& std::uint32_t hash_bitlen, cudaStream_t cuda_stream) { + RAFT_LOG_INFO( + "[JIT LAUNCHER] Entering MULTI_KERNEL launcher (random_pickup: num_queries=%zu, " + "num_pickup=%zu)", + num_queries, + num_pickup); // Create planner with tags using DataTag = decltype(get_data_type_tag()); using IndexTag = decltype(get_index_type_tag()); @@ -93,6 +91,10 @@ void random_pickup_jit(const dataset_descriptor_host& // Get the device descriptor pointer const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); + // Cast size_t parameters to match kernel signature exactly + // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly + const uint32_t ldr_u32 = static_cast(ldr); + // Dispatch kernel via launcher launcher->dispatch(cuda_stream, grid_size, @@ -107,11 +109,38 @@ void random_pickup_jit(const dataset_descriptor_host& num_seeds, result_indices_ptr, result_distances_ptr, - ldr, + ldr_u32, // Cast size_t to uint32_t visited_hashmap_ptr, hash_bitlen); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + // Check for launch errors immediately + cudaError_t launch_err = cudaPeekAtLastError(); + if (launch_err != cudaSuccess) { + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_KERNEL (random_pickup) kernel launch error detected: %s (error code: " + "%d)", + cudaGetErrorString(launch_err), + launch_err); + RAFT_CUDA_TRY(launch_err); + } + + // Synchronize to catch kernel execution errors before they propagate + cudaError_t sync_err = cudaStreamSynchronize(cuda_stream); + if (sync_err != cudaSuccess) { + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_KERNEL (random_pickup) kernel execution failed: %s (error code: %d)", + cudaGetErrorString(sync_err), + sync_err); + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_KERNEL (random_pickup) parameters: num_queries=%zu, num_pickup=%zu, " + "ldr=%u", + num_queries, + num_pickup, + ldr_u32); + RAFT_CUDA_TRY(sync_err); + } + + RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) kernel completed successfully"); } // JIT version of compute_distance_to_child_nodes @@ -140,6 +169,11 @@ void compute_distance_to_child_nodes_jit( SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream) { + RAFT_LOG_INFO( + "[JIT LAUNCHER] Entering MULTI_KERNEL launcher (compute_distance_to_child_nodes: " + "num_queries=%u, search_width=%u)", + num_queries, + search_width); // Create planner with tags using DataTag = decltype(get_data_type_tag()); using IndexTag = decltype(get_index_type_tag()); @@ -200,7 +234,36 @@ void compute_distance_to_child_nodes_jit( ldd, sample_filter); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + // Check for launch errors immediately + cudaError_t launch_err = cudaPeekAtLastError(); + if (launch_err != cudaSuccess) { + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_KERNEL (compute_distance_to_child_nodes) kernel launch error detected: " + "%s (error code: %d)", + cudaGetErrorString(launch_err), + launch_err); + RAFT_CUDA_TRY(launch_err); + } + + // Synchronize to catch kernel execution errors before they propagate + cudaError_t sync_err = cudaStreamSynchronize(cuda_stream); + if (sync_err != cudaSuccess) { + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_KERNEL (compute_distance_to_child_nodes) kernel execution failed: %s " + "(error code: %d)", + cudaGetErrorString(sync_err), + sync_err); + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_KERNEL (compute_distance_to_child_nodes) parameters: num_queries=%u, " + "search_width=%u, graph_degree=%u", + num_queries, + search_width, + graph_degree); + RAFT_CUDA_TRY(sync_err); + } + + RAFT_LOG_INFO( + "[JIT LAUNCHER] MULTI_KERNEL (compute_distance_to_child_nodes) kernel completed successfully"); } // JIT version of apply_filter @@ -215,22 +278,39 @@ void apply_filter_jit(const SourceIndexT* source_indices_ptr, SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream) { + RAFT_LOG_INFO( + "[JIT LAUNCHER] Entering MULTI_KERNEL launcher (apply_filter: num_queries=%u, " + "result_buffer_size=%u)", + num_queries, + result_buffer_size); // Extract bitset data from filter object (if it's a bitset_filter) uint32_t* bitset_ptr = nullptr; SourceIndexT bitset_len = 0; SourceIndexT original_nbits = 0; - if constexpr (!std::is_same_v) { - // Try to extract bitset data from the filter - // bitset_filter has a view() method that returns the bitset_view - if constexpr (std::is_same_v< - SAMPLE_FILTER_T, - cuvs::neighbors::filtering::bitset_filter>) { - auto bitset_view = sample_filter.view(); + // Check if it has the wrapper members (CagraSampleFilterWithQueryIdOffset) + // Note: query_id_offset is already a parameter to this function, so we don't extract it here + if constexpr (requires { + sample_filter.filter; + sample_filter.offset; + }) { + using InnerFilter = decltype(sample_filter.filter); + RAFT_LOG_INFO("Filter has wrapper members, query_id_offset parameter: %u", query_id_offset); + if constexpr (is_bitset_filter::value) { + // Extract bitset data for bitset_filter (works for any bitset_filter instantiation) + auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); + RAFT_LOG_INFO("Extracted bitset data: bitset_ptr=%p, bitset_len=%zu, original_nbits=%zu", + bitset_ptr, + static_cast(bitset_len), + static_cast(original_nbits)); + } else { + RAFT_LOG_INFO("InnerFilter is not bitset_filter, skipping bitset extraction"); } + } else { + RAFT_LOG_INFO("Filter does not have wrapper members (.filter/.offset), skipping extraction"); } // Create planner with tags @@ -275,7 +355,36 @@ void apply_filter_jit(const SourceIndexT* source_indices_ptr, bitset_len, original_nbits); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + // Check for launch errors immediately + cudaError_t launch_err = cudaPeekAtLastError(); + if (launch_err != cudaSuccess) { + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_KERNEL (apply_filter) kernel launch error detected: %s (error code: " + "%d)", + cudaGetErrorString(launch_err), + launch_err); + RAFT_CUDA_TRY(launch_err); + } + + // Synchronize to catch kernel execution errors before they propagate + cudaError_t sync_err = cudaStreamSynchronize(cuda_stream); + if (sync_err != cudaSuccess) { + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_KERNEL (apply_filter) kernel execution failed: %s (error code: %d)", + cudaGetErrorString(sync_err), + sync_err); + RAFT_LOG_ERROR( + "[JIT LAUNCHER] MULTI_KERNEL (apply_filter) parameters: num_queries=%u, " + "result_buffer_size=%u, bitset_len=%u, original_nbits=%u, query_id_offset=%u", + num_queries, + result_buffer_size, + static_cast(bitset_len), + static_cast(original_nbits), + query_id_offset); + RAFT_CUDA_TRY(sync_err); + } + + RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_KERNEL (apply_filter) kernel completed successfully"); } } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index c895e39492..54e51d32ec 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -49,13 +49,6 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { -// Import shared JIT helper functions -using cuvs::neighbors::cagra::detail::get_data_type_tag; -using cuvs::neighbors::cagra::detail::get_distance_type_tag; -using cuvs::neighbors::cagra::detail::get_index_type_tag; -using cuvs::neighbors::cagra::detail::get_sample_filter_name; -using cuvs::neighbors::cagra::detail::get_source_index_type_tag; - // The launcher uses types from search_single_cta_kernel-inl.cuh (worker_handle_t, job_desc_t) // The JIT kernel headers define _jit versions that are compatible @@ -382,19 +375,30 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn original_nbits = 0; uint32_t query_id_offset = 0; - if constexpr (!std::is_same_v) { - // All non-none filters are wrapped in CagraSampleFilterWithQueryIdOffset - // Access .filter and .offset directly - query_id_offset = sample_filter.offset; + // Check if it has the wrapper members (CagraSampleFilterWithQueryIdOffset) + if constexpr (requires { + sample_filter.filter; + sample_filter.offset; + }) { using InnerFilter = decltype(sample_filter.filter); - if constexpr (std::is_same_v< - InnerFilter, - cuvs::neighbors::filtering::bitset_filter>) { + // Always extract offset for wrapped filters + query_id_offset = sample_filter.offset; + RAFT_LOG_INFO("Extracted query_id_offset: %u", query_id_offset); + if constexpr (is_bitset_filter::value) { + // Extract bitset data for bitset_filter (works for any bitset_filter instantiation) auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); + RAFT_LOG_INFO("Extracted bitset data: bitset_ptr=%p, bitset_len=%zu, original_nbits=%zu", + bitset_ptr, + static_cast(bitset_len), + static_cast(original_nbits)); + } else { + RAFT_LOG_INFO("InnerFilter is not bitset_filter, skipping bitset extraction"); } + } else { + RAFT_LOG_INFO("Filter does not have wrapper members (.filter/.offset), skipping extraction"); } // set kernel launch parameters @@ -437,6 +441,18 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn // Get the device descriptor pointer - kernel will use the concrete type from template const auto* dev_desc = dataset_desc.get().dev_ptr(stream); + // Cast size_t/int64_t parameters to match kernel signature exactly + // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly + const uint32_t graph_degree_u32 = static_cast(graph.extent(1)); + const uint32_t hash_bitlen_u32 = static_cast(hash_bitlen); + const uint32_t small_hash_bitlen_u32 = static_cast(small_hash_bitlen); + const uint32_t small_hash_reset_interval_u32 = static_cast(small_hash_reset_interval); + const uint32_t itopk_size_u32 = static_cast(itopk_size); + const uint32_t search_width_u32 = static_cast(search_width); + const uint32_t min_iterations_u32 = static_cast(min_iterations); + const uint32_t max_iterations_u32 = static_cast(max_iterations); + const unsigned num_random_samplings_u = static_cast(num_random_samplings); + // Launch the persistent kernel via AlgorithmLauncher // The persistent kernel now takes the descriptor pointer directly launcher->dispatch_cooperative( @@ -448,25 +464,25 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn job_descriptors_ptr, completion_counters_ptr, graph.data_handle(), - graph.extent(1), + graph_degree_u32, // Cast int64_t to uint32_t source_indices_ptr, - num_random_samplings, - rand_xor_mask, - nullptr, // seed_ptr + num_random_samplings_u, // Cast uint32_t to unsigned for consistency + rand_xor_mask, // uint64_t matches kernel (8 bytes) + nullptr, // seed_ptr num_seeds, hashmap_ptr, max_candidates, max_itopk, - itopk_size, - search_width, - min_iterations, - max_iterations, - nullptr, // num_executed_iterations - hash_bitlen, - small_hash_bitlen, - small_hash_reset_interval, - query_id_offset, // Offset to add to query_id when calling filter - dev_desc, // Pass descriptor pointer + itopk_size_u32, // Cast size_t to uint32_t + search_width_u32, // Cast size_t to uint32_t + min_iterations_u32, // Cast size_t to uint32_t + max_iterations_u32, // Cast size_t to uint32_t + nullptr, // num_executed_iterations + hash_bitlen_u32, // Cast int64_t to uint32_t + small_hash_bitlen_u32, // Cast size_t to uint32_t + small_hash_reset_interval_u32, // Cast size_t to uint32_t + query_id_offset, // Offset to add to query_id when calling filter + dev_desc, // Pass descriptor pointer bitset_ptr, bitset_len, original_nbits); @@ -574,6 +590,11 @@ void select_and_run_jit( SampleFilterT sample_filter, cudaStream_t stream) { + RAFT_LOG_INFO( + "[JIT LAUNCHER] Entering SINGLE_CTA launcher (persistent=%d, num_queries=%u, topk=%u)", + ps.persistent ? 1 : 0, + num_queries, + topk); const SourceIndexT* source_indices_ptr = source_indices.has_value() ? source_indices->data_handle() : nullptr; @@ -584,19 +605,34 @@ void select_and_run_jit( SourceIndexT original_nbits = 0; uint32_t query_id_offset = 0; - if constexpr (!std::is_same_v) { - // All non-none filters are wrapped in CagraSampleFilterWithQueryIdOffset - // Access .filter and .offset directly - query_id_offset = sample_filter.offset; + // Check if it has the wrapper members (CagraSampleFilterWithQueryIdOffset) + if constexpr (requires { + sample_filter.filter; + sample_filter.offset; + }) { using InnerFilter = decltype(sample_filter.filter); - if constexpr (std::is_same_v< - InnerFilter, - cuvs::neighbors::filtering::bitset_filter>) { + // Always extract offset for wrapped filters + query_id_offset = sample_filter.offset; + RAFT_LOG_INFO("Extracted query_id_offset: %u", query_id_offset); + if constexpr (is_bitset_filter::value) { + // Extract bitset data for bitset_filter (works for any bitset_filter instantiation) auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); + RAFT_LOG_INFO("Extracted bitset data: bitset_ptr=%p, bitset_len=%zu, original_nbits=%zu", + bitset_ptr, + static_cast(bitset_len), + static_cast(original_nbits)); + RAFT_LOG_INFO("InnerFilter type: %s, bitset_view.size() type: %s, SourceIndexT: %s", + typeid(InnerFilter).name(), + typeid(decltype(bitset_view.size())).name(), + typeid(SourceIndexT).name()); + } else { + RAFT_LOG_INFO("InnerFilter is not bitset_filter, skipping bitset extraction"); } + } else { + RAFT_LOG_INFO("Filter does not have wrapper members (.filter/.offset), skipping extraction"); } // Use common logic to compute launch config @@ -713,6 +749,18 @@ void select_and_run_jit( // Get the device descriptor pointer - dev_ptr() initializes it if needed const auto* dev_desc = dataset_desc.dev_ptr(stream); + // Cast size_t/int64_t parameters to match kernel signature exactly + // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly + const uint32_t graph_degree_u32 = static_cast(graph.extent(1)); + const uint32_t hash_bitlen_u32 = static_cast(hash_bitlen); + const uint32_t small_hash_bitlen_u32 = static_cast(small_hash_bitlen); + const uint32_t small_hash_reset_interval_u32 = static_cast(small_hash_reset_interval); + const uint32_t itopk_size_u32 = static_cast(ps.itopk_size); + const uint32_t search_width_u32 = static_cast(ps.search_width); + const uint32_t min_iterations_u32 = static_cast(ps.min_iterations); + const uint32_t max_iterations_u32 = static_cast(ps.max_iterations); + const unsigned num_random_samplings_u = static_cast(ps.num_random_samplings); + dim3 grid(1, num_queries, 1); dim3 block(block_size, 1, 1); @@ -732,30 +780,71 @@ void select_and_run_jit( topk, queries_ptr, graph.data_handle(), - graph.extent(1), + graph_degree_u32, // Cast int64_t to uint32_t source_indices_ptr, - ps.num_random_samplings, - ps.rand_xor_mask, + num_random_samplings_u, // Cast uint32_t to unsigned for consistency + ps.rand_xor_mask, // uint64_t matches kernel (8 bytes) dev_seed_ptr, num_seeds, hashmap_ptr, max_candidates, max_itopk, - ps.itopk_size, // internal_topk - ps.search_width, - ps.min_iterations, - ps.max_iterations, + itopk_size_u32, // Cast size_t to uint32_t + search_width_u32, // Cast size_t to uint32_t + min_iterations_u32, // Cast size_t to uint32_t + max_iterations_u32, // Cast size_t to uint32_t num_executed_iterations, - hash_bitlen, - small_hash_bitlen, - small_hash_reset_interval, - query_id_offset, // Offset to add to query_id when calling filter + hash_bitlen_u32, // Cast int64_t to uint32_t + small_hash_bitlen_u32, // Cast size_t to uint32_t + small_hash_reset_interval_u32, // Cast size_t to uint32_t + query_id_offset, // Offset to add to query_id when calling filter dev_desc, // Pass base pointer - kernel expects concrete type but pointer value is same bitset_ptr, bitset_len, original_nbits); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + // Check for launch errors immediately + cudaError_t launch_err = cudaPeekAtLastError(); + if (launch_err != cudaSuccess) { + RAFT_LOG_ERROR("[JIT LAUNCHER] SINGLE_CTA kernel launch error detected: %s (error code: %d)", + cudaGetErrorString(launch_err), + launch_err); + RAFT_LOG_ERROR( + "[JIT LAUNCHER] SINGLE_CTA parameters: graph_degree=%u, itopk_size=%u, num_queries=%u, " + "topk=%u", + graph_degree_u32, + itopk_size_u32, + num_queries, + topk); + RAFT_CUDA_TRY(launch_err); + } + + // Synchronize to catch kernel execution errors before they propagate + // This ensures the kernel completes before we return, preventing parameter lifetime issues + cudaError_t sync_err = cudaStreamSynchronize(stream); + if (sync_err != cudaSuccess) { + RAFT_LOG_ERROR("[JIT LAUNCHER] SINGLE_CTA kernel execution failed: %s (error code: %d)", + cudaGetErrorString(sync_err), + sync_err); + RAFT_LOG_ERROR( + "[JIT LAUNCHER] SINGLE_CTA parameters: graph_degree=%u, itopk_size=%u, num_queries=%u, " + "topk=%u, search_width=%u", + graph_degree_u32, + itopk_size_u32, + num_queries, + topk, + search_width_u32); + RAFT_LOG_ERROR( + "[JIT LAUNCHER] SINGLE_CTA pointers: topk_indices=%p, topk_distances=%p, graph=%p, " + "dev_desc=%p", + reinterpret_cast(topk_indices_ptr), + topk_distances_ptr, + graph.data_handle(), + dev_desc); + RAFT_CUDA_TRY(sync_err); + } + + RAFT_LOG_INFO("[JIT LAUNCHER] SINGLE_CTA kernel completed successfully"); } } diff --git a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp index 4b4b81536e..99050b089b 100644 --- a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp +++ b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp @@ -48,6 +48,7 @@ template constexpr auto get_source_index_type_tag() { if constexpr (std::is_same_v) { return tag_idx_ui{}; } + if constexpr (std::is_same_v) { return tag_idx_l{}; } } // Helper trait to detect if a type is a bitset_filter (regardless of template parameters) @@ -71,8 +72,8 @@ std::string get_sample_filter_name(bool debug_output = false) // First check for none_sample_filter (the only unwrapped case) if constexpr (std::is_same_v) { - if (debug_output) { std::cerr << "[JIT] Returning: filter_none" << std::endl; } - return "filter_none"; + if (debug_output) { std::cerr << "[JIT] Returning: filter_none_ui" << std::endl; } + return "filter_none_ui"; } // All other filters are wrapped in CagraSampleFilterWithQueryIdOffset @@ -83,15 +84,17 @@ std::string get_sample_filter_name(bool debug_output = false) std::is_same_v> || std::is_same_v>) { if (debug_output) { - std::cerr << "[JIT] Returning: filter_bitset (via wrapped filter)" << std::endl; + std::cerr << "[JIT] Returning: filter_bitset_ui (via wrapped filter)" << std::endl; } - return "filter_bitset"; + return "filter_bitset_ui"; } } // Default to none filter for unknown types - if (debug_output) { std::cerr << "[JIT] Returning: filter_none (default/unknown)" << std::endl; } - return "filter_none"; + if (debug_output) { + std::cerr << "[JIT] Returning: filter_none_ui (default/unknown)" << std::endl; + } + return "filter_none_ui"; } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter.cu.in b/cpp/src/neighbors/detail/jit_lto_kernels/filter.cu.in similarity index 74% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter.cu.in rename to cpp/src/neighbors/detail/jit_lto_kernels/filter.cu.in index a287f10f12..b2b5a221b6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter.cu.in +++ b/cpp/src/neighbors/detail/jit_lto_kernels/filter.cu.in @@ -7,14 +7,15 @@ #ifdef BUILD_KERNEL -#include +#include -namespace cuvs::neighbors::cagra::detail { +namespace @namespace@ { // Instantiate the sample_filter device function template +// CAGRA style: sample_filter(query_id, node_id, filter_data) template __device__ bool sample_filter<@source_index_type@>(uint32_t, @source_index_type@, void*); -} // namespace cuvs::neighbors::cagra::detail +} // namespace @namespace@ #else @@ -24,7 +25,7 @@ template __device__ bool sample_filter<@source_index_type@>(uint32_t, @source_in __attribute__((__constructor__)) static void register_@kernel_name_var@() { registerAlgorithm( - "sample_filter_@filter_name_var@", + "sample_filter_@kernel_name_var@", embedded_@kernel_name_var@, sizeof(embedded_@kernel_name_var@)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh b/cpp/src/neighbors/detail/jit_lto_kernels/filter_bitset.cuh similarity index 79% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh rename to cpp/src/neighbors/detail/jit_lto_kernels/filter_bitset.cuh index c40626afa7..415fae7075 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_bitset.cuh +++ b/cpp/src/neighbors/detail/jit_lto_kernels/filter_bitset.cuh @@ -7,7 +7,7 @@ #include "filter_data.h" -namespace cuvs::neighbors::cagra::detail { +namespace cuvs::neighbors::detail { // Inline implementation of bitset_view::test() to avoid including bitset.cuh // which transitively includes Thrust @@ -45,6 +45,10 @@ __device__ inline bool bitset_view_test(const bitset_t* bitset_ptr, return is_bit_set; } +// Unified sample_filter: takes query_id, node_id, and void* filter_data +// Used by both CAGRA and IVF Flat +// For IVF Flat: node_id should be computed from (cluster_ix, sample_ix) using inds_ptrs from +// filter_data template __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* filter_data) { @@ -61,13 +65,13 @@ __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* fil // Directly test the bitset without needing bitset_filter wrapper // bitset_view_test returns true if the bit is set (node_id is in the bitset) - // For a bitset created from removed_indices, if the bit is set, the node should be filtered out - // So we return the inverse: if the bit is set, return false to reject the node + // The bitset marks allowed indices (same as non-JIT bitset_filter which returns test() directly) + // Return true if the bit is set (node is allowed), false if not set (node should be filtered out) bool is_in_bitset = bitset_view_test( bitset_data->bitset_ptr, bitset_data->bitset_len, bitset_data->original_nbits, node_id); - // If node_id is in the bitset (removed set), return false to reject it - // If node_id is not in the bitset, return true to allow it - return !is_in_bitset; + // If node_id is in the bitset (allowed), return true to allow it + // If node_id is not in the bitset, return false to reject it + return is_in_bitset; } -} // namespace cuvs::neighbors::cagra::detail +} // namespace cuvs::neighbors::detail diff --git a/cpp/src/neighbors/detail/jit_lto_kernels/filter_data.h b/cpp/src/neighbors/detail/jit_lto_kernels/filter_data.h new file mode 100644 index 0000000000..9fc4336872 --- /dev/null +++ b/cpp/src/neighbors/detail/jit_lto_kernels/filter_data.h @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include + +namespace cuvs::neighbors::detail { + +// Structure to hold bitset filter data +// This is passed as void* to the extern sample_filter function +// Used by both CAGRA and IVF Flat +template +struct bitset_filter_data_t { + uint32_t* bitset_ptr; // Pointer to bitset data in global memory + SourceIndexT bitset_len; // Length of bitset array + SourceIndexT original_nbits; // Original number of bits + + __device__ bitset_filter_data_t(uint32_t* ptr, SourceIndexT len, SourceIndexT nbits) + : bitset_ptr(ptr), bitset_len(len), original_nbits(nbits) + { + } +}; + +} // namespace cuvs::neighbors::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_none.cuh b/cpp/src/neighbors/detail/jit_lto_kernels/filter_none.cuh similarity index 58% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_none.cuh rename to cpp/src/neighbors/detail/jit_lto_kernels/filter_none.cuh index f4eab464c6..e3ca5496c1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_none.cuh +++ b/cpp/src/neighbors/detail/jit_lto_kernels/filter_none.cuh @@ -7,12 +7,10 @@ #include -// Note: We don't include sample_filter.cuh here because it's not needed for JIT -// The JIT version defines its own sample_filter function directly -// #include "../../../sample_filter.cuh" - -namespace cuvs::neighbors::cagra::detail { +namespace cuvs::neighbors::detail { +// Unified sample_filter: takes query_id, node_id, and void* filter_data +// Used by both CAGRA and IVF Flat template __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* filter_data) { @@ -21,4 +19,4 @@ __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* fil return true; } -} // namespace cuvs::neighbors::cagra::detail +} // namespace cuvs::neighbors::detail diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index 8dc735f03e..aba8f4e560 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -5,6 +5,7 @@ #pragma once +#include "../detail/jit_lto_kernels/filter_data.h" #include "../ivf_common.cuh" #include "jit_lto_kernels/interleaved_scan_planner.hpp" #include @@ -96,10 +97,10 @@ template constexpr auto get_filter_name() { if constexpr (std::is_same_v>) { - return "filter_none"; + return "filter_none_l"; } if constexpr (std::is_same_v>) { - return "filter_bitset"; + return "filter_bitset_l"; } } @@ -227,6 +228,9 @@ void launch_kernel(const index& index, return; } + // Pass individual filter parameters like CAGRA does + // The kernel will construct filter_data struct internally when needed + for (uint32_t query_offset = 0; query_offset < num_queries; query_offset += kMaxGridY) { uint32_t grid_dim_y = std::min(kMaxGridY, num_queries - query_offset); dim3 grid_dim(grid_dim_x, grid_dim_y, 1); @@ -254,7 +258,6 @@ void launch_kernel(const index& index, max_samples, chunk_indices, index.dim(), - // sample_filter, inds_ptrs, bitset_ptr.value_or(nullptr), bitset_len.value_or(0), diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh deleted file mode 100644 index 49cd89d08a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh +++ /dev/null @@ -1,64 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include - -namespace cuvs::neighbors::ivf_flat::detail { - -// Inline implementation of bitset_view::test() to avoid including bitset.cuh -// which transitively includes Thrust -template -__device__ inline bool bitset_view_test(const bitset_t* bitset_ptr, - index_t bitset_len, - index_t original_nbits, - index_t sample_index) -{ - constexpr index_t bitset_element_size = sizeof(bitset_t) * 8; - const index_t nbits = sizeof(bitset_t) * 8; - index_t bit_index = 0; - index_t bit_offset = 0; - - if (original_nbits == 0 || nbits == original_nbits) { - bit_index = sample_index / bitset_element_size; - bit_offset = sample_index % bitset_element_size; - } else { - // Handle original_nbits != nbits case - const index_t original_bit_index = sample_index / original_nbits; - const index_t original_bit_offset = sample_index % original_nbits; - bit_index = original_bit_index * original_nbits / nbits; - bit_offset = 0; - if (original_nbits > nbits) { - bit_index += original_bit_offset / nbits; - bit_offset = original_bit_offset % nbits; - } else { - index_t ratio = nbits / original_nbits; - bit_offset += (original_bit_index % ratio) * original_nbits; - bit_offset += original_bit_offset % nbits; - } - } - const bitset_t bit_element = bitset_ptr[bit_index]; - const bool is_bit_set = (bit_element & (bitset_t{1} << bit_offset)) != 0; - return is_bit_set; -} - -template -__device__ bool sample_filter(index_t* const* const inds_ptrs, - const uint32_t query_ix, - const uint32_t cluster_ix, - const uint32_t sample_ix, - uint32_t* bitset_ptr, - index_t bitset_len, - index_t original_nbits) -{ - // Convert cluster_ix and sample_ix to a single sample index using inds_ptrs - const index_t sample_index = inds_ptrs[cluster_ix][sample_ix]; - - // Directly test the bitset without needing bitset_filter or ivf_to_sample_filter wrappers - return bitset_view_test(bitset_ptr, bitset_len, original_nbits, sample_index); -} - -} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh deleted file mode 100644 index 90a124688a..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh +++ /dev/null @@ -1,24 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include - -namespace cuvs::neighbors::ivf_flat::detail { - -template -__device__ constexpr bool sample_filter(index_t* const* const inds_ptrs, - const uint32_t query_ix, - const uint32_t cluster_ix, - const uint32_t sample_ix, - uint32_t* bitset_ptr, - index_t bitset_len, - index_t original_nbits) -{ - return true; -} - -} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in index 5e75253939..4ca8e88fd6 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in @@ -12,6 +12,7 @@ namespace cuvs::neighbors::ivf_flat::detail { // Instantiate the kernel template +// Pass individual filter parameters like CAGRA does template __global__ void interleaved_scan_kernel<@capacity@, @veclen@, @ascending@, @compute_norm@, @data_type@, @acc_type@, @idx_type@>( const uint32_t, const @data_type@*, const uint32_t*, const @data_type@* const*, const uint32_t*, const uint32_t, const uint32_t, const uint32_t, const uint32_t, const uint32_t*, const uint32_t, diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp index 792c64f39a..da256d2218 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_planner.hpp @@ -33,7 +33,7 @@ struct InterleavedScanPlanner : AlgorithmPlanner { void add_filter_device_function(std::string filter_name) { - auto key = filter_name; + auto key = "sample_filter_" + filter_name; this->device_functions.push_back(key); } diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh index d32f29b859..c5ff7b35f3 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/ivf_flat_interleaved_scan_kernel.cuh @@ -5,6 +5,8 @@ #pragma once +#include "../../detail/cagra/jit_lto_kernels/extern_device_functions.cuh" +#include "../../detail/jit_lto_kernels/filter_data.h" #include #include #include @@ -34,14 +36,10 @@ static constexpr int kThreadsPerBlock = 128; template extern __device__ void compute_dist(AccT& acc, AccT x, AccT y); -template -extern __device__ bool sample_filter(index_t* const* const inds_ptrs, - const uint32_t query_ix, - const uint32_t cluster_ix, - const uint32_t sample_ix, - uint32_t* bitset_ptr, - index_t bitset_len, - index_t original_nbits); +// Unified sample_filter interface: takes query_id, node_id, and void* filter_data +// For IVF Flat: node_id should be computed from (cluster_ix, sample_ix) using inds_ptrs from +// filter_data sample_filter is declared in extern_device_functions.cuh (shared with CAGRA) +using cuvs::neighbors::detail::sample_filter; template extern __device__ T post_process(T val); @@ -784,10 +782,11 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) const uint32_t max_samples, const uint32_t* chunk_indices, const uint32_t dim, - IdxT* const* const inds_ptrs, - uint32_t* bitset_ptr, - IdxT bitset_len, - IdxT original_nbits, + IdxT* const* const inds_ptrs, // Always needed for IVF Flat to convert + // (list_id, vec_id) to node_id + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + IdxT bitset_len, // Bitset length + IdxT original_nbits, // Original number of bits uint32_t* neighbors, float* distances) { @@ -859,13 +858,15 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) // This is the vector a given lane/thread handles const uint32_t vec_id = group_id * raft::WarpSize + lane_id; - const bool valid = vec_id < list_length && sample_filter(inds_ptrs, - queries_offset + blockIdx.y, - list_id, - vec_id, - bitset_ptr, - bitset_len, - original_nbits); + // For IVF Flat, convert (list_id, vec_id) to node_id using inds_ptrs + const IdxT node_id = inds_ptrs[list_id][vec_id]; + // Construct filter_data struct (bitset data is in global memory) + cuvs::neighbors::detail::bitset_filter_data_t filter_data( + bitset_ptr, bitset_len, original_nbits); + const bool valid = + vec_id < list_length && + sample_filter( + queries_offset + blockIdx.y, node_id, bitset_ptr != nullptr ? &filter_data : nullptr); if (valid) { // Process first shm_assisted_dim dimensions (always using shared memory) From 5239a1a3204b06df13f90a7872e9119c15053719 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 18 Feb 2026 18:32:49 +0000 Subject: [PATCH 107/158] fix compile --- .../neighbors/detail/cagra/search_multi_kernel.cuh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index 512d17896d..7ae4d11cd5 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -930,7 +930,7 @@ struct search if (iter + 1 >= min_iterations) { if (terminate_flag.data() == nullptr) { - RAFT_FAIL("terminate_flag.data() is NULL before set_value at iteration %zu", iter + 1); + RAFT_FAIL("terminate_flag.data() is NULL before set_value at iteration %u", iter + 1); } set_value(terminate_flag.data(), 1, stream); } @@ -941,18 +941,18 @@ struct search // Validate all pointers before passing to kernel to prevent memory corruption if (terminate_flag.data() == nullptr) { - RAFT_FAIL("terminate_flag.data() is NULL before pickup_next_parents at iteration %zu", + RAFT_FAIL("terminate_flag.data() is NULL before pickup_next_parents at iteration %u", iter + 1); } if (result_indices.data() == nullptr) { - RAFT_FAIL("result_indices.data() is NULL before pickup_next_parents at iteration %zu", + RAFT_FAIL("result_indices.data() is NULL before pickup_next_parents at iteration %u", iter + 1); } if (hashmap.data() == nullptr) { - RAFT_FAIL("hashmap.data() is NULL before pickup_next_parents at iteration %zu", iter + 1); + RAFT_FAIL("hashmap.data() is NULL before pickup_next_parents at iteration %u", iter + 1); } if (parent_node_list.data() == nullptr) { - RAFT_FAIL("parent_node_list.data() is NULL before pickup_next_parents at iteration %zu", + RAFT_FAIL("parent_node_list.data() is NULL before pickup_next_parents at iteration %u", iter + 1); } @@ -972,7 +972,7 @@ struct search // termination (2) if (iter + 1 >= min_iterations) { if (terminate_flag.data() == nullptr) { - RAFT_FAIL("terminate_flag.data() is NULL at iteration %zu", iter + 1); + RAFT_FAIL("terminate_flag.data() is NULL at iteration %u", iter + 1); } if (get_value(terminate_flag.data(), stream)) { iter++; From 736dc75f67e31c8ac60077932eba4d7af7a84dae Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 18 Feb 2026 13:51:03 -0600 Subject: [PATCH 108/158] Ignore cache-host run exports --- conda/recipes/libcuvs/recipe.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/conda/recipes/libcuvs/recipe.yaml b/conda/recipes/libcuvs/recipe.yaml index 5d12000ded..a916dbde8e 100644 --- a/conda/recipes/libcuvs/recipe.yaml +++ b/conda/recipes/libcuvs/recipe.yaml @@ -356,6 +356,10 @@ outputs: - librmm - mkl - nccl + - if: cuda_major == "13" + then: + - cuda-nvrtc + - libnvjitlink about: homepage: ${{ load_from_file("python/libcuvs/pyproject.toml").project.urls.Homepage }} license: ${{ load_from_file("python/libcuvs/pyproject.toml").project.license }} @@ -539,6 +543,10 @@ outputs: - librmm - mkl - nccl + - if: cuda_major == "13" + then: + - cuda-nvrtc + - libnvjitlink about: homepage: ${{ load_from_file("python/cuvs_bench/pyproject.toml").project.urls.Homepage }} license: ${{ load_from_file("python/cuvs_bench/pyproject.toml").project.license }} From a7a4ef76cc51832f1bb929f3eea40845c09aeaf9 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 19 Feb 2026 02:46:09 +0000 Subject: [PATCH 109/158] pull out metric --- .../modules/generate_jit_lto_kernels.cmake | 689 +++++++++--------- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 5 +- .../cagra/compute_distance_standard-impl.cuh | 124 +++- .../cagra/compute_distance_vpq-impl.cuh | 20 +- .../apply_normalization_standard_cosine.cu.in | 42 ++ ...ply_normalization_standard_cosine_impl.cuh | 35 + .../apply_normalization_standard_noop.cu.in | 42 ++ ...apply_normalization_standard_noop_impl.cuh | 29 + .../compute_distance_standard.cu.in | 14 +- .../compute_distance_standard_impl.cuh | 6 +- ...mpute_distance_to_child_nodes_kernel.cu.in | 11 +- ...e_distance_to_child_nodes_kernel_vpq.cu.in | 11 +- .../compute_distance_vpq.cu.in | 15 +- .../compute_distance_vpq_impl.cuh | 8 +- .../jit_lto_kernels/device_common_jit.cuh | 24 +- .../cagra/jit_lto_kernels/dist_op.cu.in | 37 + .../jit_lto_kernels/dist_op_cosine_impl.cuh | 19 + .../jit_lto_kernels/dist_op_hamming_impl.cuh | 22 + .../dist_op_inner_product_impl.cuh | 19 + .../cagra/jit_lto_kernels/dist_op_l2_impl.cuh | 20 + .../extern_device_functions.cuh | 57 +- .../random_pickup_kernel.cu.in | 11 +- .../random_pickup_kernel_vpq.cu.in | 11 +- .../search_multi_cta_kernel.cu.in | 11 +- .../search_multi_cta_kernel_jit.cuh | 11 +- .../search_multi_cta_kernel_vpq.cu.in | 11 +- .../search_multi_cta_planner.hpp | 79 +- .../search_multi_kernel_jit.cuh | 50 +- .../search_multi_kernel_planner.hpp | 77 +- .../search_single_cta_kernel.cu.in | 11 +- .../search_single_cta_kernel_jit.cuh | 15 +- .../search_single_cta_kernel_p.cu.in | 11 +- .../search_single_cta_kernel_p_vpq.cu.in | 11 +- .../search_single_cta_kernel_vpq.cu.in | 11 +- .../search_single_cta_planner.hpp | 85 ++- .../setup_workspace_standard.cu.in | 13 +- .../setup_workspace_standard_impl.cuh | 12 +- .../jit_lto_kernels/setup_workspace_vpq.cu.in | 13 +- .../setup_workspace_vpq_impl.cuh | 14 +- .../detail/cagra/search_multi_kernel.cuh | 12 +- 40 files changed, 1097 insertions(+), 621 deletions(-) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 3e5a8776d2..eb050c4de1 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -240,106 +240,175 @@ function(generate_jit_lto_kernels target) set(cagra_source_index_types "uint32_t") set(cagra_source_index_abbrevs "ui") - # Generate standard descriptor fragments + # Generate setup_workspace_standard fragments (one per team_size, dataset_block_dim, data_type, + # index_type, distance_type) Note: Metric is no longer a template parameter - it's linked via + # dist_op and normalization fragments foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) - foreach(metric_idx IN ITEMS 0 1 2 3) - list(GET cagra_metrics ${metric_idx} metric) - list(GET cagra_metric_abbrevs ${metric_idx} metric_name) - # Map metric abbreviation to full name used by planner's metric_to_string() - if(metric_name STREQUAL "l2") - set(metric_name_full "L2Expanded") - set(metric_tag "l2") - elseif(metric_name STREQUAL "ip") - set(metric_name_full "InnerProduct") - set(metric_tag "inner_product") - elseif(metric_name STREQUAL "cos") - set(metric_name_full "CosineExpanded") - set(metric_tag "cosine") - elseif(metric_name STREQUAL "hamming") - set(metric_name_full "BitwiseHamming") - set(metric_tag "hamming") - # BitwiseHamming is only supported for uint8_t (data_idx=2) - if(NOT data_idx EQUAL 2) - continue() - endif() - else() - set(metric_name_full "${metric_name}") - set(metric_tag "${metric_name}") + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # setup_workspace_standard (no metric in name) + set(kernel_name + "setup_workspace_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + + # Generate compute_distance_standard fragments (without metric - metric is handled via dist_op + # fragments) + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # compute_distance_standard (no metric parameter - uses JIT-linked dist_op) + set(kernel_name + "compute_distance_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + + # Generate dist_op fragments for each metric Note: InnerProduct and CosineExpanded both use + # inner_product dist_op, so we only generate it once + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + # Generate dist_op fragments for unique metric tags only l2, inner_product (used by both ip and + # cos), hamming + set(dist_op_tags "l2" "inner_product" "hamming") + foreach(metric_tag IN LISTS dist_op_tags) + # Skip hamming for non-uint8_t types + if(metric_tag STREQUAL "hamming" AND NOT data_idx EQUAL 2) + continue() endif() - foreach(team_size IN LISTS cagra_team_sizes) - foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # setup_workspace_standard - set(kernel_name - "setup_workspace_standard_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - set(metric_cpp "cuvs::distance::DistanceType::${metric}") - set(metric_name "${metric_name_full}") - set(data_type "${data_type}") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) + # Generate dist_op fragment for this metric tag Note: dist_op only needs DataT and DistanceT, + # not IndexT + set(kernel_name "dist_op_${metric_tag}_${type_abbrev}_${cagra_distance_abbrev}") + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(metric_tag "${metric_tag}") + set(data_type "${data_type}") + set(distance_type "${cagra_distance_type}") + set(type_abbrev "${type_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in" + "${filename}" @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() - # compute_distance_standard - set(kernel_name - "compute_distance_standard_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - set(metric_cpp "cuvs::distance::DistanceType::${metric}") - set(metric_name "${metric_name_full}") - set(data_type "${data_type}") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - endforeach() + # Generate normalization fragments (no-op and cosine) These are used to normalize distances for + # CosineExpanded metric + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # No-op normalization fragment (for non-CosineExpanded metrics) + set(kernel_name + "apply_normalization_standard_noop_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # Cosine normalization fragment (for CosineExpanded metric) + set(kernel_name + "apply_normalization_standard_cosine_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) endforeach() endforeach() endforeach() - # Generate VPQ descriptor fragments (for L2Expanded and all data types: float, half, int8_t, - # uint8_t) + # Generate VPQ descriptor fragments NOTE: VPQ is ONLY supported with L2Expanded metric (not + # InnerProduct, CosineExpanded, etc.) Generating for all data types: float, half, int8_t, uint8_t foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) foreach(pq_len IN LISTS cagra_pq_lens) - # setup_workspace_vpq + # setup_workspace_vpq Note: Metric is no longer in the kernel name - VPQ only supports + # L2Expanded set(kernel_name - "setup_workspace_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "setup_workspace_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") - set(metric_name "L2Expanded") - set(metric_tag "l2") + # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore set(pq_bits "${cagra_pq_bits}") set(codebook_type "${cagra_codebook_type}") set(data_type "${data_type}") @@ -360,14 +429,13 @@ function(generate_jit_lto_kernels target) EMBEDDED_ARRAY "embedded_${kernel_name}" ) - # compute_distance_vpq + # compute_distance_vpq Note: Metric is no longer in the kernel name - VPQ only supports + # L2Expanded set(kernel_name - "compute_distance_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "compute_distance_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") - set(metric_name "L2Expanded") - set(metric_tag "l2") + # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore set(pq_bits "${cagra_pq_bits}") set(codebook_type "${cagra_codebook_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -401,113 +469,85 @@ function(generate_jit_lto_kernels target) # For kernel instantiation, we need to provide template parameters The actual # metric/team_size/dataset_block_dim used at runtime are determined via device functions We use - # default values for the template instantiation - these don't affect runtime behavior + # default values for the template instantiation - these don't affect runtime behavior Note: Metric + # is no longer in the kernel name - it's linked via dist_op and normalization fragments foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) - foreach(metric_idx IN ITEMS 0 1 2 3) - list(GET cagra_metrics ${metric_idx} metric) - list(GET cagra_metric_abbrevs ${metric_idx} metric_name) - # Map metric abbreviation to full name used by planner's metric_to_string() - if(metric_name STREQUAL "l2") - set(metric_name_full "L2Expanded") - set(metric_tag "l2") - elseif(metric_name STREQUAL "ip") - set(metric_name_full "InnerProduct") - set(metric_tag "inner_product") - elseif(metric_name STREQUAL "cos") - set(metric_name_full "CosineExpanded") - set(metric_tag "cosine") - elseif(metric_name STREQUAL "hamming") - set(metric_name_full "BitwiseHamming") - set(metric_tag "hamming") - # BitwiseHamming is only supported for uint8_t (data_idx=2) - if(NOT data_idx EQUAL 2) - continue() - endif() - else() - set(metric_name_full "${metric_name}") - set(metric_tag "${metric_name}") - endif() - foreach(topk_idx IN ITEMS 0 1) - list(GET cagra_topk_by_bitonic_sort_options ${topk_idx} topk_by_bitonic_sort) - list(GET cagra_topk_by_bitonic_sort_str_options ${topk_idx} topk_by_bitonic_sort_str) - foreach(merge_idx IN ITEMS 0 1) - list(GET cagra_bitonic_sort_and_merge_multi_warps_options ${merge_idx} - bitonic_sort_and_merge_multi_warps - ) - list(GET cagra_bitonic_sort_and_merge_multi_warps_str_options ${merge_idx} - bitonic_sort_and_merge_multi_warps_str - ) - foreach(team_size IN LISTS cagra_team_sizes) - foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # CAGRA only uses uint32_t as SourceIndexT - set(source_index_type "uint32_t") - set(src_idx_abbrev "ui") - # Regular kernel entrypoint - set(kernel_name - "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" - ) - set(filename - "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" - ) - set(metric "${metric}") - set(metric_cpp "cuvs::distance::DistanceType::${metric}") - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) + foreach(topk_idx IN ITEMS 0 1) + list(GET cagra_topk_by_bitonic_sort_options ${topk_idx} topk_by_bitonic_sort) + list(GET cagra_topk_by_bitonic_sort_str_options ${topk_idx} topk_by_bitonic_sort_str) + foreach(merge_idx IN ITEMS 0 1) + list(GET cagra_bitonic_sort_and_merge_multi_warps_options ${merge_idx} + bitonic_sort_and_merge_multi_warps + ) + list(GET cagra_bitonic_sort_and_merge_multi_warps_str_options ${merge_idx} + bitonic_sort_and_merge_multi_warps_str + ) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") + # Regular kernel entrypoint (no metric in name) + set(kernel_name + "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + ) + set(filename + "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" + ) + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) - # Persistent kernel entrypoint - set(kernel_name - "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" - ) - set(filename - "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" - ) - set(metric "${metric}") - set(metric_cpp "cuvs::distance::DistanceType::${metric}") - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - endforeach() # dataset_block_dim - endforeach() # team_size - endforeach() # merge_idx - endforeach() # topk_idx - endforeach() # metric_idx + # Persistent kernel entrypoint (no metric in name) + set(kernel_name + "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + ) + set(filename + "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" + ) + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() # dataset_block_dim + endforeach() # team_size + endforeach() # merge_idx + endforeach() # topk_idx endforeach() # data_idx - # Generate single_cta VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to - # team_size and dataset_block_dim + # Generate single_cta VPQ kernel entrypoints NOTE: VPQ is ONLY supported with L2Expanded metric + # VPQ kernels need pq_bits and pq_len in addition to team_size and dataset_block_dim foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) @@ -527,16 +567,16 @@ function(generate_jit_lto_kernels target) # CAGRA only uses uint32_t as SourceIndexT set(source_index_type "uint32_t") set(src_idx_abbrev "ui") - # Regular VPQ kernel entrypoint + # Regular VPQ kernel entrypoint Note: Metric is no longer in the kernel name - VPQ + # only supports L2Expanded set(kernel_name - "search_single_cta_kernel_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "search_single_cta_kernel_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" ) - set(metric "L2Expanded") - set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") - set(metric_name_full "L2Expanded") + # VPQ only supports L2Expanded, but we don't need to pass metric to the template + # anymore set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") set(pq_bits "${cagra_pq_bits}") @@ -559,16 +599,16 @@ function(generate_jit_lto_kernels target) EMBEDDED_ARRAY "embedded_${kernel_name}" ) - # Persistent VPQ kernel entrypoint + # Persistent VPQ kernel entrypoint Note: Metric is no longer in the kernel name - VPQ + # only supports L2Expanded set(kernel_name - "search_single_cta_kernel_p_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "search_single_cta_kernel_p_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" ) - set(metric "L2Expanded") - set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") - set(metric_name_full "L2Expanded") + # VPQ only supports L2Expanded, but we don't need to pass metric to the template + # anymore set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") set(pq_bits "${cagra_pq_bits}") @@ -607,65 +647,40 @@ function(generate_jit_lto_kernels target) foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) - foreach(metric_idx IN ITEMS 0 1 2 3) - list(GET cagra_metrics ${metric_idx} metric) - list(GET cagra_metric_abbrevs ${metric_idx} metric_name) - if(metric_name STREQUAL "l2") - set(metric_name_full "L2Expanded") - set(metric_tag "l2") - elseif(metric_name STREQUAL "ip") - set(metric_name_full "InnerProduct") - set(metric_tag "inner_product") - elseif(metric_name STREQUAL "cos") - set(metric_name_full "CosineExpanded") - set(metric_tag "cosine") - elseif(metric_name STREQUAL "hamming") - set(metric_name_full "BitwiseHamming") - set(metric_tag "hamming") - # BitwiseHamming is only supported for uint8_t (data_idx=2) - if(NOT data_idx EQUAL 2) - continue() - endif() - else() - set(metric_name_full "${metric_name}") - set(metric_tag "${metric_name}") - endif() - foreach(team_size IN LISTS cagra_team_sizes) - foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # CAGRA only uses uint32_t as SourceIndexT - set(source_index_type "uint32_t") - set(src_idx_abbrev "ui") - # Multi_cta kernel entrypoint - set(kernel_name - "search_multi_cta_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") - set(metric "${metric}") - set(metric_cpp "cuvs::distance::DistanceType::${metric_name_full}") - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - endforeach() # dataset_block_dim - endforeach() # team_size - endforeach() # metric_idx + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") + # Multi_cta kernel entrypoint (no metric in name) + set(kernel_name + "search_multi_cta_kernel_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() # dataset_block_dim + endforeach() # team_size endforeach() # data_idx - # Generate multi_cta VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to + # Generate multi_cta VPQ kernel entrypoints NOTE: VPQ is ONLY supported with L2Expanded metric + # (not InnerProduct, CosineExpanded, etc.) VPQ kernels need pq_bits and pq_len in addition to # team_size and dataset_block_dim VPQ is supported for all data types (float, half, int8_t, # uint8_t) CAGRA only uses uint32_t as SourceIndexT foreach(data_idx IN ITEMS 0 1 2 3) @@ -677,14 +692,13 @@ function(generate_jit_lto_kernels target) # CAGRA only uses uint32_t as SourceIndexT set(source_index_type "uint32_t") set(src_idx_abbrev "ui") - # Multi_cta VPQ kernel entrypoint + # Multi_cta VPQ kernel entrypoint Note: Metric is no longer in the kernel name - VPQ only + # supports L2Expanded set(kernel_name - "search_multi_cta_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "search_multi_cta_kernel_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") - set(metric "L2Expanded") - set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") - set(metric_name_full "L2Expanded") + # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") set(pq_bits "${cagra_pq_bits}") @@ -716,90 +730,61 @@ function(generate_jit_lto_kernels target) foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) - foreach(metric_idx IN ITEMS 0 1 2 3) - list(GET cagra_metrics ${metric_idx} metric) - list(GET cagra_metric_abbrevs ${metric_idx} metric_name) - if(metric_name STREQUAL "l2") - set(metric_name_full "L2Expanded") - set(metric_tag "l2") - elseif(metric_name STREQUAL "ip") - set(metric_name_full "InnerProduct") - set(metric_tag "inner_product") - elseif(metric_name STREQUAL "cos") - set(metric_name_full "CosineExpanded") - set(metric_tag "cosine") - elseif(metric_name STREQUAL "hamming") - set(metric_name_full "BitwiseHamming") - set(metric_tag "hamming") - # BitwiseHamming is only supported for uint8_t (data_idx=2) - if(NOT data_idx EQUAL 2) - continue() - endif() - else() - set(metric_name_full "${metric_name}") - set(metric_tag "${metric_name}") - endif() - - foreach(team_size IN LISTS cagra_team_sizes) - foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # random_pickup_kernel entrypoint - generate for each combination Note: - # random_pickup_kernel doesn't use SourceIndexT, so no loop needed - set(kernel_name - "random_pickup_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") - set(metric "${metric}") - set(metric_cpp "cuvs::distance::DistanceType::${metric_name_full}") - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # random_pickup_kernel entrypoint (no metric in name) Note: random_pickup_kernel doesn't use + # SourceIndexT, so no loop needed + set(kernel_name + "random_pickup_kernel_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) - # CAGRA only uses uint32_t as SourceIndexT - set(source_index_type "uint32_t") - set(src_idx_abbrev "ui") - # compute_distance_to_child_nodes_kernel entrypoint - set(kernel_name - "compute_distance_to_child_nodes_kernel_${metric_name_full}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") - set(metric "${metric}") - set(metric_cpp "cuvs::distance::DistanceType::${metric_name_full}") - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - endforeach() # dataset_block_dim - endforeach() # team_size - endforeach() # metric_idx + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") + # compute_distance_to_child_nodes_kernel entrypoint (no metric in name) + set(kernel_name + "compute_distance_to_child_nodes_kernel_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() # dataset_block_dim + endforeach() # team_size endforeach() # data_idx # Generate multi_kernel VPQ kernel entrypoints VPQ kernels need pq_bits and pq_len in addition to @@ -811,14 +796,13 @@ function(generate_jit_lto_kernels target) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) foreach(pq_len IN LISTS cagra_pq_lens) - # random_pickup_kernel VPQ entrypoint + # random_pickup_kernel VPQ entrypoint Note: Metric is no longer in the kernel name - VPQ + # only supports L2Expanded set(kernel_name - "random_pickup_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "random_pickup_kernel_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") - set(metric "L2Expanded") - set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") - set(metric_name_full "L2Expanded") + # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") set(pq_bits "${cagra_pq_bits}") @@ -844,14 +828,13 @@ function(generate_jit_lto_kernels target) # CAGRA only uses uint32_t as SourceIndexT set(source_index_type "uint32_t") set(src_idx_abbrev "ui") - # compute_distance_to_child_nodes_kernel VPQ entrypoint + # compute_distance_to_child_nodes_kernel VPQ entrypoint Note: Metric is no longer in the + # kernel name - VPQ only supports L2Expanded set(kernel_name - "compute_distance_to_child_nodes_kernel_vpq_L2Expanded_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "compute_distance_to_child_nodes_kernel_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") - set(metric "L2Expanded") - set(metric_cpp "cuvs::distance::DistanceType::L2Expanded") - set(metric_name_full "L2Expanded") + # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") set(pq_bits "${cagra_pq_bits}") diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 7942460f52..63789385d2 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -153,8 +153,9 @@ std::shared_ptr AlgorithmPlanner::build() // Load the generated LTO IR and link them together nvJitLinkHandle handle; - const char* lopts[] = {"-lto", archs.c_str()}; - auto result = nvJitLinkCreate(&handle, 2, lopts); + const char* lopts[] = { + "-lto", "-split-compile=0", "-split-compile-extended=0", "-maxrregcount=64", archs.c_str()}; + auto result = nvJitLinkCreate(&handle, 5, lopts); check_nvjitlink_result(handle, result); for (auto& frag : this->fragments) { diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh index 3e3c83f5c0..281f6d98bb 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh @@ -13,7 +13,37 @@ #include namespace cuvs::neighbors::cagra::detail { + +#if defined(CUVS_ENABLE_JIT_LTO) || defined(BUILD_KERNEL) + +// When JIT LTO is enabled or building kernel fragments, dist_op is an extern function that gets JIT +// linked from fragments Each fragment provides a metric-specific implementation (L2Expanded, +// InnerProduct, etc.) The planner will link the appropriate fragment based on the metric Note: +// extern functions cannot be constexpr, so we remove constexpr here Note: These are in the detail +// namespace (not anonymous) so they can be found by JIT linking +template +extern __device__ DISTANCE_T dist_op(DATA_T a, DATA_T b); + +// Normalization is also JIT linked from fragments (no-op for most metrics, cosine normalization for +// CosineExpanded) The planner will link the appropriate fragment (cosine or noop) based on the +// metric +template +extern __device__ DistanceT apply_normalization_standard( + DistanceT distance, + const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index); + +#endif + namespace { + +#if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) + +// When JIT LTO is disabled, dist_op is a template function with Metric as a template parameter template RAFT_DEVICE_INLINE_FUNCTION constexpr auto dist_op(DATA_T a, DATA_T b) -> std::enable_if_t @@ -41,18 +71,31 @@ RAFT_DEVICE_INLINE_FUNCTION constexpr auto dist_op(DATA_T a, DATA_T b) const auto v = (a ^ b) & 0xffu; return __popc(v); } + +#endif // #if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) } // namespace -template + typename DistanceT +#if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) + , + cuvs::distance::DistanceType Metric +#endif + > struct standard_dataset_descriptor_t : public dataset_descriptor_base_t { using base_type = dataset_descriptor_base_t; - using QUERY_T = typename std:: +#if defined(CUVS_ENABLE_JIT_LTO) || defined(BUILD_KERNEL) + // When JIT LTO is enabled or building kernel fragments, Metric is not a template parameter + // QUERY_T is always float (BitwiseHamming uses uint8_t as DataT, but query is still float) + using QUERY_T = float; +#else + // When JIT LTO is disabled, Metric is a template parameter + using QUERY_T = typename std:: conditional_t; +#endif using base_type::args; using base_type::smem_ws_size_in_bytes; using typename base_type::args_t; @@ -62,7 +105,9 @@ struct standard_dataset_descriptor_t : public dataset_descriptor_base_t(k + v)); +#if defined(CUVS_ENABLE_JIT_LTO) || defined(BUILD_KERNEL) + // When JIT LTO is enabled or building kernel fragments, dist_op is an extern function (no + // template parameters) + r += dist_op( + d, cuvs::spatial::knn::detail::utils::mapping{}(data[e][v])); +#else + // When JIT LTO is disabled, dist_op is a template function with Metric parameter r += dist_op( d, cuvs::spatial::knn::detail::utils::mapping{}(data[e][v])); +#endif } } } @@ -233,19 +286,31 @@ _RAFT_DEVICE __noinline__ auto compute_distance_standard( args.dim, args.smem_ws_ptr); +#if defined(CUVS_ENABLE_JIT_LTO) || defined(BUILD_KERNEL) + // Normalization is JIT linked from fragments (no-op or cosine normalization) + // The planner links the appropriate fragment based on the metric + distance = + apply_normalization_standard(distance, args, dataset_index); +#else + // When JIT LTO is disabled, kMetric is always available as a compile-time constant if constexpr (DescriptorT::kMetric == cuvs::distance::DistanceType::CosineExpanded) { const auto* dataset_norms = DescriptorT::dataset_norms_ptr(args); auto norm = dataset_norms[dataset_index]; if (norm > 0) { distance = distance / norm; } } +#endif return distance; } #ifndef BUILD_KERNEL -// The init kernel is not needed when building JIT fragments (BUILD_KERNEL is defined) -// It's only needed for non-JIT initialization. When BUILD_KERNEL is defined, we're building -// a JIT fragment and don't want this kernel to be instantiated. +// The init kernel is used for both JIT and non-JIT initialization +// When BUILD_KERNEL is defined, we're building a JIT fragment and don't want this kernel. +// The kernel handles JIT vs non-JIT via ifdef internally template ; + standard_dataset_descriptor_t; using base_type = typename desc_type::base_type; -#ifdef CUVS_ENABLE_JIT_LTO - // For JIT, we don't use the function pointers, so set them to nullptr - // The free functions are called directly instead - new (out) desc_type(nullptr, // setup_workspace_impl - not used in JIT - nullptr, // compute_distance_impl - not used in JIT + // For CUDA 12 (non-JIT), set the function pointers properly + new (out) desc_type(reinterpret_cast( + &setup_workspace_standard), + reinterpret_cast( + &compute_distance_standard), ptr, size, dim, ld, dataset_norms); #else - // For CUDA 12 (non-JIT), set the function pointers properly - new (out) desc_type(reinterpret_cast( - &setup_workspace_standard), - reinterpret_cast( - &compute_distance_standard), + // When JIT LTO is enabled, Metric is not a template parameter + using desc_type = + standard_dataset_descriptor_t; + using base_type = typename desc_type::base_type; + + // For JIT, we don't use the function pointers, so set them to nullptr + // The free functions are called directly instead + new (out) desc_type(nullptr, // setup_workspace_impl - not used in JIT + nullptr, // compute_distance_impl - not used in JIT ptr, size, dim, @@ -290,9 +361,8 @@ RAFT_KERNEL __launch_bounds__(1, 1) #endif // #ifndef BUILD_KERNEL #ifndef BUILD_KERNEL -// The init_ function is not needed when building JIT fragments (BUILD_KERNEL is defined) -// It's only needed for non-JIT initialization. When BUILD_KERNEL is defined, we're building -// a JIT fragment and don't want this host function to be included. +// The init_ function is used for both JIT and non-JIT initialization +// When BUILD_KERNEL is defined, we're building a JIT fragment and don't want this function. template ; + using base_type = typename desc_type::base_type; +#else + // When JIT LTO is enabled, Metric is not a template parameter using desc_type = - standard_dataset_descriptor_t; + standard_dataset_descriptor_t; using base_type = typename desc_type::base_type; +#endif RAFT_EXPECTS(Metric != cuvs::distance::DistanceType::CosineExpanded || dataset_norms != nullptr, "Dataset norms must be provided for CosineExpanded metric"); diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh index 34839c8654..004aaf39b6 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh @@ -15,15 +15,19 @@ namespace cuvs::neighbors::cagra::detail { -template + typename DistanceT +#if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) + , + cuvs::distance::DistanceType Metric +#endif + > struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t { using base_type = dataset_descriptor_base_t; using CODE_BOOK_T = CodebookT; @@ -37,7 +41,9 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the cosine normalization function +// This fragment provides apply_normalization_standard that normalizes by dataset norm +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( + @distance_type@, const args_t, @index_type@); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + // This fragment provides apply_normalization_standard (cosine normalization version) + // The planner links the appropriate fragment (noop or cosine) based on metric + registerAlgorithm( + "apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@", + embedded_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_impl.cuh new file mode 100644 index 0000000000..38cf5a5de8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_impl.cuh @@ -0,0 +1,35 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_standard-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +// Cosine normalization fragment implementation +// This provides apply_normalization_standard that normalizes by dataset norm (for CosineExpanded +// metric) +template +__device__ DistanceT +apply_normalization_standard(DistanceT distance, + const typename cuvs::neighbors::cagra::detail:: + dataset_descriptor_base_t::args_t args, + IndexT dataset_index) +{ + // CosineExpanded normalization: divide by dataset norm + const auto* dataset_norms = + standard_dataset_descriptor_t:: + dataset_norms_ptr(args); + auto norm = dataset_norms[dataset_index]; + if (norm > 0) { distance = distance / norm; } + return distance; +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in new file mode 100644 index 0000000000..9b303d7420 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in @@ -0,0 +1,42 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the noop normalization function +// This fragment provides apply_normalization_standard that does nothing +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( + @distance_type@, const args_t, @index_type@); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + // This fragment provides apply_normalization_standard (no-op version) + // The planner links the appropriate fragment (noop or cosine) based on metric + registerAlgorithm( + "apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@", + embedded_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_impl.cuh new file mode 100644 index 0000000000..eee4b6fa79 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_impl.cuh @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_standard-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +// No-op normalization fragment implementation +// This provides apply_normalization_standard that does nothing (for non-CosineExpanded metrics) +template +__device__ DistanceT +apply_normalization_standard(DistanceT distance, + const typename cuvs::neighbors::cagra::detail:: + dataset_descriptor_base_t::args_t args, + IndexT dataset_index) +{ + // No normalization needed for non-CosineExpanded metrics + return distance; +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in index dbe22e09b4..31cfd6e010 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in @@ -12,9 +12,9 @@ namespace cuvs::neighbors::cagra::detail { // Instantiate the compute_distance_standard function for standard descriptor -using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; +// Note: Metric is no longer a template parameter - it's determined via JIT-linked dist_op fragments using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance_standard<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( +template __device__ @distance_type@ compute_distance_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( const args_t, @index_type@); } // namespace cuvs::neighbors::cagra::detail @@ -23,18 +23,18 @@ template __device__ @distance_type@ compute_distance_standard<@metric_cpp@, @tea #include #include -#include "compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +__attribute__((__constructor__)) static void register_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { registerAlgorithm( - "compute_distance_@metric_name@_t@team_size@_dim@dataset_block_dim@", - embedded_compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_compute_distance_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + "compute_distance_standard_t@team_size@_dim@dataset_block_dim@", + embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh index 7153596376..1beff009b4 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh @@ -16,8 +16,8 @@ namespace cuvs::neighbors::cagra::detail { // Extern function implementation for compute_distance_standard (standard descriptor) // Returns per-thread distance (team_sum must be called by the caller) -template ; + standard_dataset_descriptor_t; auto per_thread_distance = cuvs::neighbors::cagra::detail::compute_distance_standard(args, dataset_index); return per_thread_distance; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index 54eaefbd04..6bd2970f94 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -13,7 +13,8 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { // Instantiate the compute_distance_to_child_nodes_kernel_jit function with concrete descriptor type -using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; +// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; template __global__ void compute_distance_to_child_nodes_kernel_jit( const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, const desc_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); @@ -23,7 +24,7 @@ template __global__ void compute_distance_to_child_nodes_kernel_jit #include -#include "compute_distance_to_child_nodes_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "compute_distance_to_child_nodes_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -33,9 +34,9 @@ __attribute__((__constructor__)) static void register_compute_distance_to_child_ tag_idx_@idx_abbrev@, tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@>( - "compute_distance_to_child_nodes_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", - embedded_compute_distance_to_child_nodes_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_compute_distance_to_child_nodes_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + "compute_distance_to_child_nodes_kernel_t@team_size@_dim@dataset_block_dim@", + embedded_compute_distance_to_child_nodes_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_compute_distance_to_child_nodes_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in index 0d16b4a411..41dbfc4f67 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in @@ -13,7 +13,8 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { // Instantiate the compute_distance_to_child_nodes_kernel_jit function with concrete VPQ descriptor type -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; +// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; template __global__ void compute_distance_to_child_nodes_kernel_jit( const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, const desc_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); @@ -23,7 +24,7 @@ template __global__ void compute_distance_to_child_nodes_kernel_jit #include -#include "compute_distance_to_child_nodes_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "compute_distance_to_child_nodes_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -34,9 +35,9 @@ __attribute__((__constructor__)) static void register_compute_distance_to_child_ tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@, tag_codebook_half>( - "compute_distance_to_child_nodes_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_compute_distance_to_child_nodes_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_compute_distance_to_child_nodes_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + "compute_distance_to_child_nodes_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_compute_distance_to_child_nodes_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_compute_distance_to_child_nodes_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in index 638791181d..18a36fb9ac 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in @@ -12,9 +12,10 @@ namespace cuvs::neighbors::cagra::detail { // Instantiate the compute_distance_vpq function for VPQ descriptor -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; +// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance_vpq<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( +template __device__ @distance_type@ compute_distance_vpq<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( const args_t, @index_type@); } // namespace cuvs::neighbors::cagra::detail @@ -23,19 +24,19 @@ template __device__ @distance_type@ compute_distance_vpq<@metric_cpp@, @team_siz #include #include -#include "compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +__attribute__((__constructor__)) static void register_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { registerAlgorithm( - "compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_compute_distance_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh index 700f193c99..560a360434 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl.cuh @@ -16,8 +16,8 @@ namespace cuvs::neighbors::cagra::detail { // Extern function implementation for compute_distance_vpq (VPQ descriptor) // Returns per-thread distance (team_sum must be called by the caller) -template ) { - // Standard descriptor - use the metric from the descriptor type itself - per_thread_norm2 = compute_distance_standard(args, seed_index); } else { - // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself - per_thread_norm2 = compute_distance_vpq) { - // Standard descriptor - use the metric from the descriptor type itself - per_thread_dist = compute_distance_standard(args, child_id); } else { - // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself - per_thread_dist = compute_distance_vpq + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the dist_op function for the specific metric +// Each fragment provides dist_op for a specific metric - planner links the appropriate one +template __device__ @distance_type@ dist_op<@data_type@, @distance_type@>(@data_type@, @data_type@); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "dist_op_@metric_tag@_@type_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_dist_op_@metric_tag@_@type_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "dist_op_@metric_tag@", + embedded_dist_op_@metric_tag@_@type_abbrev@_@dist_abbrev@, + sizeof(embedded_dist_op_@metric_tag@_@type_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh new file mode 100644 index 0000000000..aa29e475aa --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +namespace cuvs::neighbors::cagra::detail { + +// dist_op fragment for CosineExpanded metric (same as InnerProduct) +template +__device__ DISTANCE_T dist_op(DATA_T a, DATA_T b) +{ + return -static_cast(a) * static_cast(b); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh new file mode 100644 index 0000000000..728970492f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh @@ -0,0 +1,22 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include + +namespace cuvs::neighbors::cagra::detail { + +// dist_op fragment for BitwiseHamming metric +template +__device__ DISTANCE_T dist_op(DATA_T a, DATA_T b) +{ + // mask the result of xor for the integer promotion + const auto v = (a ^ b) & 0xffu; + return __popc(v); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh new file mode 100644 index 0000000000..bc2446316d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +namespace cuvs::neighbors::cagra::detail { + +// dist_op fragment for InnerProduct metric +template +__device__ DISTANCE_T dist_op(DATA_T a, DATA_T b) +{ + return -static_cast(a) * static_cast(b); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh new file mode 100644 index 0000000000..170d0969e9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh @@ -0,0 +1,20 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +namespace cuvs::neighbors::cagra::detail { + +// dist_op fragment for L2Expanded metric +template +__device__ DISTANCE_T dist_op(DATA_T a, DATA_T b) +{ + DISTANCE_T diff = a - b; + return diff * diff; +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh index b4455df0a3..5d412f3566 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -5,23 +5,24 @@ #pragma once -#include "../compute_distance.hpp" +// This file contains extern function declarations for JIT LTO +// The actual descriptor type definitions are in -impl.cuh files which are included +// directly by the .cu.in files with CUVS_ENABLE_JIT_LTO defined +// Forward declarations must match the JIT LTO version (no Metric parameter) + +#include "../compute_distance.hpp" // For dataset_descriptor_base_t #include -// Forward declarations of descriptor types (full definitions are in -impl.cuh files) -// This file is only included by JIT kernel headers which are included by .cu.in files -// The .cu.in files include the -impl.cuh files directly for full type definitions namespace cuvs::neighbors::cagra::detail { -template struct standard_dataset_descriptor_t; -template extern __device__ const - standard_dataset_descriptor_t* - setup_workspace_standard(const standard_dataset_descriptor_t* desc, - void* smem, - const DataT* queries, - uint32_t query_id); + standard_dataset_descriptor_t* + setup_workspace_standard( + const standard_dataset_descriptor_t* desc, + void* smem, + const DataT* queries, + uint32_t query_id); -template -extern __device__ const cagra_q_dataset_descriptor_t* -setup_workspace_vpq(const cagra_q_dataset_descriptor_t; +// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; template __global__ void random_pickup_kernel_jit( const desc_t*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); @@ -23,7 +24,7 @@ template __global__ void random_pickup_kernel_jit #include -#include "random_pickup_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "random_pickup_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -33,9 +34,9 @@ __attribute__((__constructor__)) static void register_random_pickup_kernel_@type tag_idx_@idx_abbrev@, tag_dist_@dist_abbrev@, tag_idx_@idx_abbrev@>( - "random_pickup_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", - embedded_random_pickup_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_random_pickup_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + "random_pickup_kernel_t@team_size@_dim@dataset_block_dim@", + embedded_random_pickup_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_random_pickup_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in index b2bd39feca..4acc085d2d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in @@ -13,7 +13,8 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { // Instantiate the random_pickup_kernel_jit function with concrete VPQ descriptor type -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; +// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; template __global__ void random_pickup_kernel_jit( const desc_t*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); @@ -23,7 +24,7 @@ template __global__ void random_pickup_kernel_jit #include -#include "random_pickup_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "random_pickup_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -34,9 +35,9 @@ __attribute__((__constructor__)) static void register_random_pickup_kernel_vpq_@ tag_dist_@dist_abbrev@, tag_idx_@idx_abbrev@, tag_codebook_half>( - "random_pickup_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_random_pickup_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_random_pickup_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + "random_pickup_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_random_pickup_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_random_pickup_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index c14c04d9b3..3aae61b33b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -18,7 +18,8 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { // Instantiate the search_kernel_jit function with concrete descriptor type -using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; +// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); @@ -28,7 +29,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit #include -#include "search_multi_cta_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_multi_cta_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -38,9 +39,9 @@ __attribute__((__constructor__)) static void register_search_multi_cta_kernel_@t tag_idx_@idx_abbrev@, tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@>( - "search_multi_cta_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", - embedded_search_multi_cta_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_multi_cta_kernel_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + "search_multi_cta_kernel_t@team_size@_dim@dataset_block_dim@", + embedded_search_multi_cta_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_multi_cta_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh index 622e1fb6b0..5190da7725 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -125,17 +125,16 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( // Check if DescriptorT is a standard_dataset_descriptor_t by checking if it doesn't have kPqBits // (standard descriptors don't have kPqBits, VPQ descriptors do) if constexpr (!has_kpq_bits_v) { - // Standard descriptor - use the metric from the descriptor type itself - smem_desc = setup_workspace_standard(dataset_desc, smem, queries_ptr, query_id); } else { - // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself - smem_desc = setup_workspace_vpq; +// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); @@ -27,7 +28,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit #include -#include "search_multi_cta_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_multi_cta_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -38,9 +39,9 @@ __attribute__((__constructor__)) static void register_search_multi_cta_kernel_vp tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@, tag_codebook_half>( - "search_multi_cta_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_search_multi_cta_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_multi_cta_kernel_vpq_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + "search_multi_cta_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_search_multi_cta_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_multi_cta_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index 86243b8168..188a5e481d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -54,17 +54,18 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { std::string key = "setup_workspace_"; if (is_vpq) { key += "vpq_"; + // Note: Metric is no longer in the key - VPQ only supports L2Expanded using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; } else { + // Standard dataset - Metric is no longer in the key, linked via dist_op and normalization + // fragments auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + key += "standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + params; } @@ -78,23 +79,69 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { uint32_t pq_bits = 0, uint32_t pq_len = 0) { - std::string key = "compute_distance_"; if (is_vpq) { - key += "vpq_"; + // VPQ: Metric is no longer in the key - VPQ only supports L2Expanded + std::string key = "compute_distance_vpq_"; using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; + this->device_functions.push_back(key); } else { - auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + // Standard: compute_distance_standard no longer has metric in the name + // Metric is handled via dist_op fragments + std::string key = "compute_distance_standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); + auto params = make_fragment_key(); key += "_" + params; + this->device_functions.push_back(key); + + // Add dist_op fragment for the metric + add_dist_op_device_function(metric); + + // Add normalization fragment (cosine or noop) + add_normalization_device_function(metric, team_size, dataset_block_dim); } + } + + void add_dist_op_device_function(cuvs::distance::DistanceType metric) + { + std::string metric_tag; + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2Unexpanded: metric_tag = "l2"; break; + case cuvs::distance::DistanceType::InnerProduct: metric_tag = "inner_product"; break; + case cuvs::distance::DistanceType::CosineExpanded: + metric_tag = "inner_product"; // CosineExpanded uses inner_product dist_op + break; + case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; + default: metric_tag = "unknown"; break; + } + auto params = make_fragment_key(); + std::string key = "dist_op_" + metric_tag + "_" + params; + this->device_functions.push_back(key); + } + + void add_normalization_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim) + { + // Both cosine and noop fragments provide the same function name "apply_normalization_standard" + // but register with different fragment names. The planner links the appropriate one based on + // metric. + std::string normalization_type; + if (metric == cuvs::distance::DistanceType::CosineExpanded) { + normalization_type = "cosine"; + } else { + normalization_type = "noop"; + } + auto params = make_fragment_key(); + std::string key = "apply_normalization_standard_" + normalization_type; + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + params; this->device_functions.push_back(key); } @@ -113,9 +160,13 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { uint32_t pq_bits, uint32_t pq_len) { - std::string name = "search_multi_cta_kernel_"; - if (is_vpq) { name += "vpq_"; } - name += metric_to_string(metric); + std::string name = "search_multi_cta_kernel"; + if (is_vpq) { + name += "_vpq"; + // Note: Metric is no longer in VPQ kernel names - VPQ only supports L2Expanded + } + // Note: Metric is no longer in kernel names - it's linked via dist_op and normalization + // fragments name += "_t" + std::to_string(team_size); name += "_dim" + std::to_string(dataset_block_dim); if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh index a62ccc9c9b..b6082823ee 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh @@ -73,21 +73,16 @@ RAFT_KERNEL random_pickup_kernel_jit( // Check if DescriptorT is a standard_dataset_descriptor_t by checking if it doesn't have kPqBits // (standard descriptors don't have kPqBits, VPQ descriptors do) if constexpr (!has_kpq_bits_v) { - // Standard descriptor - use the metric from the descriptor type itself - // DescriptorT should already be standard_dataset_descriptor_t where Metric matches - // DescriptorT::kMetric - smem_desc = setup_workspace_standard(dataset_desc, smem, queries_ptr, query_id); } else { - // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself - // DescriptorT should already be cagra_q_dataset_descriptor_t where Metric matches - // DescriptorT::kMetric - smem_desc = setup_workspace_vpq) { - // Standard descriptor - use the metric from the descriptor type itself - per_thread_norm2 = compute_distance_standard(args, seed_index); } else { - // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself - per_thread_norm2 = compute_distance_vpq) { - // Standard descriptor - use the metric from the descriptor type itself - smem_desc = setup_workspace_standard(dataset_desc, smem, query_ptr, query_id); } else { - // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself - smem_desc = setup_workspace_vpq) { - // Standard descriptor - use the metric from the descriptor type itself - per_thread_norm2 = compute_distance_standard(args, child_id); } else { - // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself - per_thread_norm2 = compute_distance_vpq(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; } else { + // Standard dataset - Metric is no longer in the key, linked via dist_op and normalization + // fragments auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + key += "standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + params; } @@ -81,23 +82,69 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { uint32_t pq_bits = 0, uint32_t pq_len = 0) { - std::string key = "compute_distance_"; if (is_vpq) { - key += "vpq_"; + // VPQ: Metric is no longer in the key - VPQ only supports L2Expanded + std::string key = "compute_distance_vpq_"; using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; + this->device_functions.push_back(key); } else { - auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + // Standard: compute_distance_standard no longer has metric in the name + // Metric is handled via dist_op fragments + std::string key = "compute_distance_standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); + auto params = make_fragment_key(); key += "_" + params; + this->device_functions.push_back(key); + + // Add dist_op fragment for the metric + add_dist_op_device_function(metric); + + // Add normalization fragment (cosine or noop) + add_normalization_device_function(metric, team_size, dataset_block_dim); } + } + + void add_dist_op_device_function(cuvs::distance::DistanceType metric) + { + std::string metric_tag; + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2Unexpanded: metric_tag = "l2"; break; + case cuvs::distance::DistanceType::InnerProduct: metric_tag = "inner_product"; break; + case cuvs::distance::DistanceType::CosineExpanded: + metric_tag = "inner_product"; // CosineExpanded uses inner_product dist_op + break; + case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; + default: metric_tag = "unknown"; break; + } + auto params = make_fragment_key(); + std::string key = "dist_op_" + metric_tag + "_" + params; + this->device_functions.push_back(key); + } + + void add_normalization_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim) + { + // Both cosine and noop fragments provide the same function name "apply_normalization_standard" + // but register with different fragment names. The planner links the appropriate one based on + // metric. + std::string normalization_type; + if (metric == cuvs::distance::DistanceType::CosineExpanded) { + normalization_type = "cosine"; + } else { + normalization_type = "noop"; + } + auto params = make_fragment_key(); + std::string key = "apply_normalization_standard_" + normalization_type; + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + params; this->device_functions.push_back(key); } @@ -123,8 +170,12 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { if (kernel_name == "apply_filter_kernel") { return kernel_name; } std::string name = kernel_name; - if (is_vpq) { name += "_vpq"; } - name += "_" + metric_to_string(metric); + if (is_vpq) { + name += "_vpq"; + // Note: Metric is no longer in VPQ kernel names - VPQ only supports L2Expanded + } + // Note: Metric is no longer in kernel names - it's linked via dist_op and normalization + // fragments name += "_t" + std::to_string(team_size); name += "_dim" + std::to_string(dataset_block_dim); if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index 286193c2f0..f1c99ee698 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -13,7 +13,8 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { // Instantiate the search_kernel_jit function with concrete descriptor type -using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t; +// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); @@ -23,7 +24,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_b #include #include -#include "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -33,9 +34,9 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_@ tag_idx_@idx_abbrev@, tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@>( - "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", - embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@", + embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh index bcc1c7bc69..8e30192605 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -163,21 +163,16 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( // Check if DescriptorT is a standard_dataset_descriptor_t by checking if it doesn't have kPqBits // (standard descriptors don't have kPqBits, VPQ descriptors do) if constexpr (!has_kpq_bits_v) { - // Standard descriptor - use the metric from the descriptor type itself - // DescriptorT should already be standard_dataset_descriptor_t where Metric matches - // DescriptorT::kMetric - smem_desc = setup_workspace_standard(dataset_desc, smem, queries_ptr, query_id); } else { - // Must be cagra_q_dataset_descriptor_t - use the metric from the descriptor type itself - // DescriptorT should already be cagra_q_dataset_descriptor_t where Metric matches - // DescriptorT::kMetric - smem_desc = setup_workspace_vpq; +// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments +using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); @@ -23,7 +24,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by #include #include -#include "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -33,9 +34,9 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_p tag_idx_@idx_abbrev@, tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@>( - "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@", - embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@", + embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in index 372aa4e9cb..c708b15920 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in @@ -13,7 +13,8 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { // Instantiate the search_kernel_p_jit function with concrete VPQ descriptor type -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; +// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); @@ -23,7 +24,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by #include #include -#include "search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -34,9 +35,9 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_p tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@, tag_codebook_half>( - "search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + "search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in index 462a3c3812..286b2b29b3 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in @@ -13,7 +13,8 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { // Instantiate the search_kernel_jit function with concrete VPQ descriptor type -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; +// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded +using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); @@ -23,7 +24,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_b #include #include -#include "search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -34,9 +35,9 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_v tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@, tag_codebook_half>( - "search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@metric_name_full@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + "search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index ae40c4cb6f..d2445bdcca 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -71,19 +71,19 @@ struct CagraSearchPlanner : AlgorithmPlanner { if (is_vpq) { key += "vpq_"; // For VPQ, include codebook type tag in template parameters + // Note: Metric is no longer in the key - VPQ only supports L2Expanded using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; // Use template tags only for types, strings for integers/enums auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; } else { - // Use template tags only for types, strings for integers/enums + // Standard dataset - Metric is no longer in the key, linked via dist_op and normalization + // fragments Use template tags only for types, strings for integers/enums auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + key += "standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + params; } @@ -97,26 +97,69 @@ struct CagraSearchPlanner : AlgorithmPlanner { uint32_t pq_bits = 0, uint32_t pq_len = 0) { - std::string key = "compute_distance_"; if (is_vpq) { - key += "vpq_"; - // For VPQ, include codebook type tag in template parameters + // VPQ: Metric is no longer in the key - VPQ only supports L2Expanded + std::string key = "compute_distance_vpq_"; using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - // Use template tags only for types, strings for integers/enums - auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + auto params = make_fragment_key(); + key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; + this->device_functions.push_back(key); } else { - // Use template tags only for types, strings for integers/enums - auto params = make_fragment_key(); - key += metric_to_string(metric); - key += "_t" + std::to_string(team_size); + // Standard: compute_distance_standard no longer has metric in the name + // Metric is handled via dist_op fragments + std::string key = "compute_distance_standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); + auto params = make_fragment_key(); key += "_" + params; + this->device_functions.push_back(key); + + // Add dist_op fragment for the metric + add_dist_op_device_function(metric); + + // Add normalization fragment (cosine or noop) + add_normalization_device_function(metric, team_size, dataset_block_dim); + } + } + + void add_dist_op_device_function(cuvs::distance::DistanceType metric) + { + std::string metric_tag; + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2Unexpanded: metric_tag = "l2"; break; + case cuvs::distance::DistanceType::InnerProduct: metric_tag = "inner_product"; break; + case cuvs::distance::DistanceType::CosineExpanded: + metric_tag = "inner_product"; // CosineExpanded uses inner_product dist_op + break; + case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; + default: metric_tag = "unknown"; break; + } + auto params = make_fragment_key(); + std::string key = "dist_op_" + metric_tag + "_" + params; + this->device_functions.push_back(key); + } + + void add_normalization_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim) + { + // Both cosine and noop fragments provide the same function name "apply_normalization_standard" + // but register with different fragment names. The planner links the appropriate one based on + // metric. + std::string normalization_type; + if (metric == cuvs::distance::DistanceType::CosineExpanded) { + normalization_type = "cosine"; + } else { + normalization_type = "noop"; } + auto params = make_fragment_key(); + std::string key = "apply_normalization_standard_" + normalization_type; + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + params; this->device_functions.push_back(key); } @@ -137,11 +180,15 @@ struct CagraSearchPlanner : AlgorithmPlanner { bool persistent) { std::string name = (persistent ? "search_single_cta_kernel_p_" : "search_single_cta_kernel_"); - if (is_vpq) { name += "vpq_"; } + if (is_vpq) { + name += "vpq_"; + // Note: Metric is no longer in VPQ kernel names - VPQ only supports L2Expanded + } name += bool_to_string(topk_by_bitonic_sort) + "_"; name += bool_to_string(bitonic_sort_and_merge_multi_warps) + "_"; - name += metric_to_string(metric); - name += "_t" + std::to_string(team_size); + // Note: Metric is no longer in kernel names - it's linked via dist_op and normalization + // fragments + name += "t" + std::to_string(team_size); name += "_dim" + std::to_string(dataset_block_dim); if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } return name; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in index 1eeac8c6ba..7afb69cb1e 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in @@ -12,7 +12,8 @@ namespace cuvs::neighbors::cagra::detail { // Instantiate the setup_workspace_standard function for standard descriptor -template __device__ const cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>* setup_workspace_standard<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>(const cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); +// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments +template __device__ const cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>* setup_workspace_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>(const cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail @@ -20,18 +21,18 @@ template __device__ const cuvs::neighbors::cagra::detail::standard_dataset_descr #include #include -#include "setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +__attribute__((__constructor__)) static void register_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { registerAlgorithm( - "setup_workspace_@metric_name@_t@team_size@_dim@dataset_block_dim@", - embedded_setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_setup_workspace_standard_@metric_name@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@", + embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh index 30956a17be..39280ca238 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh @@ -18,18 +18,18 @@ namespace cuvs::neighbors::cagra::detail { // Takes the concrete descriptor pointer and calls the free function directly (not through function // pointer) For JIT LTO, the descriptor's setup_workspace_impl is nullptr, so we must call the free // function directly -template __device__ const cuvs::neighbors::cagra::detail:: - standard_dataset_descriptor_t* + standard_dataset_descriptor_t* setup_workspace_standard( const cuvs::neighbors::cagra::detail:: - standard_dataset_descriptor_t* - desc, + standard_dataset_descriptor_t* desc, void* smem, const DataT* queries, uint32_t query_id) @@ -39,7 +39,7 @@ __device__ const cuvs::neighbors::cagra::detail:: // Call the free function directly (not desc->setup_workspace() which uses a function pointer) // The free function is in compute_distance_standard-impl.cuh using desc_t = cuvs::neighbors::cagra::detail:: - standard_dataset_descriptor_t; + standard_dataset_descriptor_t; const auto* result = cuvs::neighbors::cagra::detail::setup_workspace_standard(desc, smem, queries, query_id); return result; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in index c81268e2c5..dc87a44e02 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in @@ -12,7 +12,8 @@ namespace cuvs::neighbors::cagra::detail { // Instantiate the setup_workspace_vpq function for VPQ descriptor -template __device__ const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>* setup_workspace_vpq<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@metric_cpp@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); +// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded +template __device__ const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>* setup_workspace_vpq<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail @@ -20,19 +21,19 @@ template __device__ const cuvs::neighbors::cagra::detail::cagra_q_dataset_descri #include #include -#include "setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +__attribute__((__constructor__)) static void register_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { registerAlgorithm( - "setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_setup_workspace_vpq_@metric_name@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + "setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh index e38f3ecbe8..0c300911bf 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh @@ -18,8 +18,8 @@ namespace cuvs::neighbors::cagra::detail { // Takes the concrete descriptor pointer and calls the free function directly (not through function // pointer) For JIT LTO, the descriptor's setup_workspace_impl is nullptr, so we must call the free // function directly -template -__device__ const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t* setup_workspace_vpq( - const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_tsetup_workspace() which uses a function pointer) // The free function is in compute_distance_vpq-impl.cuh - using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t= min_iterations) { if (terminate_flag.data() == nullptr) { - RAFT_FAIL("terminate_flag.data() is NULL before set_value at iteration %zu", iter + 1); + RAFT_FAIL("terminate_flag.data() is NULL before set_value at iteration %u", iter + 1); } set_value(terminate_flag.data(), 1, stream); } @@ -941,18 +941,18 @@ struct search // Validate all pointers before passing to kernel to prevent memory corruption if (terminate_flag.data() == nullptr) { - RAFT_FAIL("terminate_flag.data() is NULL before pickup_next_parents at iteration %zu", + RAFT_FAIL("terminate_flag.data() is NULL before pickup_next_parents at iteration %u", iter + 1); } if (result_indices.data() == nullptr) { - RAFT_FAIL("result_indices.data() is NULL before pickup_next_parents at iteration %zu", + RAFT_FAIL("result_indices.data() is NULL before pickup_next_parents at iteration %u", iter + 1); } if (hashmap.data() == nullptr) { - RAFT_FAIL("hashmap.data() is NULL before pickup_next_parents at iteration %zu", iter + 1); + RAFT_FAIL("hashmap.data() is NULL before pickup_next_parents at iteration %u", iter + 1); } if (parent_node_list.data() == nullptr) { - RAFT_FAIL("parent_node_list.data() is NULL before pickup_next_parents at iteration %zu", + RAFT_FAIL("parent_node_list.data() is NULL before pickup_next_parents at iteration %u", iter + 1); } @@ -972,7 +972,7 @@ struct search // termination (2) if (iter + 1 >= min_iterations) { if (terminate_flag.data() == nullptr) { - RAFT_FAIL("terminate_flag.data() is NULL at iteration %zu", iter + 1); + RAFT_FAIL("terminate_flag.data() is NULL at iteration %u", iter + 1); } if (get_value(terminate_flag.data(), stream)) { iter++; From 07a158c5126f8a005f5e48d8b79a53d0c9ff050a Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 19 Feb 2026 17:58:45 +0000 Subject: [PATCH 110/158] use void* for desc and create more fragments --- .../modules/generate_jit_lto_kernels.cmake | 322 +++++++++++++++++- .../jit_lto_kernels/compute_distance_impl.cuh | 28 ++ ...compute_distance_standard_impl_unified.cuh | 37 ++ .../compute_distance_standard_unified.cu.in | 42 +++ ...mpute_distance_to_child_nodes_kernel.cu.in | 23 +- ...e_distance_to_child_nodes_kernel_vpq.cu.in | 43 --- .../compute_distance_vpq_impl_unified.cuh | 44 +++ .../compute_distance_vpq_unified.cu.in | 43 +++ .../descriptor_accessors_standard_impl.cuh | 110 ++++++ .../descriptor_accessors_vpq_impl.cuh | 151 ++++++++ .../jit_lto_kernels/device_common_jit.cuh | 139 ++++---- .../extern_device_functions.cuh | 99 +++++- .../jit_lto_kernels/get_args_standard.cu.in | 39 +++ .../cagra/jit_lto_kernels/get_args_vpq.cu.in | 39 +++ .../jit_lto_kernels/get_dim_standard.cu.in | 38 +++ .../cagra/jit_lto_kernels/get_dim_vpq.cu.in | 38 +++ .../jit_lto_kernels/get_size_standard.cu.in | 38 +++ .../cagra/jit_lto_kernels/get_size_vpq.cu.in | 38 +++ .../get_smem_ws_size_in_bytes_standard.cu.in | 38 +++ .../get_smem_ws_size_in_bytes_vpq.cu.in | 38 +++ ...eam_size_bitshift_from_smem_standard.cu.in | 38 +++ ...get_team_size_bitshift_from_smem_vpq.cu.in | 38 +++ .../get_team_size_bitshift_standard.cu.in | 38 +++ .../get_team_size_bitshift_vpq.cu.in | 38 +++ .../random_pickup_kernel.cu.in | 23 +- .../random_pickup_kernel_vpq.cu.in | 43 --- .../search_multi_cta_kernel.cu.in | 25 +- .../search_multi_cta_kernel_jit.cuh | 131 +++---- .../search_multi_cta_kernel_vpq.cu.in | 47 --- .../search_multi_cta_planner.hpp | 74 ++++ .../search_multi_kernel_jit.cuh | 222 ++++++------ .../search_multi_kernel_planner.hpp | 81 +++++ .../search_single_cta_kernel.cu.in | 21 +- .../search_single_cta_kernel_jit.cuh | 159 +++++---- .../search_single_cta_kernel_p.cu.in | 21 +- .../search_single_cta_kernel_p_vpq.cu.in | 43 --- .../search_single_cta_kernel_vpq.cu.in | 43 --- .../search_single_cta_planner.hpp | 79 ++++- .../jit_lto_kernels/setup_workspace_impl.cuh | 28 ++ .../setup_workspace_standard.cu.in | 11 +- .../setup_workspace_standard_impl_unified.cuh | 43 +++ .../jit_lto_kernels/setup_workspace_vpq.cu.in | 11 +- .../setup_workspace_vpq_impl_unified.cuh | 50 +++ .../search_multi_cta_kernel_launcher_jit.cuh | 7 + .../detail/cagra/search_multi_kernel.cuh | 9 +- .../search_multi_kernel_launcher_jit.cuh | 31 ++ .../search_single_cta_kernel_launcher_jit.cuh | 12 + 47 files changed, 2152 insertions(+), 601 deletions(-) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl_unified.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_standard_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_vpq_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl_unified.cuh diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index eb050c4de1..46c47ed683 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -292,7 +292,7 @@ function(generate_jit_lto_kernels target) set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in" "${filename}" @ONLY ) @@ -307,6 +307,255 @@ function(generate_jit_lto_kernels target) endforeach() endforeach() + # Generate descriptor accessor fragments for standard descriptors These fragments provide get_dim, + # get_size, get_team_size_bitshift, get_args, get_smem_ws_size_in_bytes + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + # get_dim_standard + set(kernel_name + "get_dim_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_size_standard + set(kernel_name + "get_size_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_team_size_bitshift_standard + set(kernel_name + "get_team_size_bitshift_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_team_size_bitshift_from_smem_standard + set(kernel_name + "get_team_size_bitshift_from_smem_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_args_standard + set(kernel_name + "get_args_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_args_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_smem_ws_size_in_bytes_standard + set(kernel_name + "get_smem_ws_size_in_bytes_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + + # Generate descriptor accessor fragments for VPQ descriptors + foreach(data_idx IN ITEMS 0 1 2 3) + list(GET cagra_data_types ${data_idx} data_type) + list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) + foreach(team_size IN LISTS cagra_team_sizes) + foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) + foreach(pq_len IN LISTS cagra_pq_lens) + # get_dim_vpq + set(kernel_name + "get_dim_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(pq_bits "${cagra_pq_bits}") + set(codebook_type "${cagra_codebook_type}") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_size_vpq + set(kernel_name + "get_size_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_team_size_bitshift_vpq + set(kernel_name + "get_team_size_bitshift_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_team_size_bitshift_from_smem_vpq + set(kernel_name + "get_team_size_bitshift_from_smem_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_args_vpq + set(kernel_name + "get_args_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + + # get_smem_ws_size_in_bytes_vpq + set(kernel_name + "get_smem_ws_size_in_bytes_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() + endforeach() + endforeach() + endforeach() + # Generate dist_op fragments for each metric Note: InnerProduct and CosineExpanded both use # inner_product dist_op, so we only generate it once foreach(data_idx IN ITEMS 0 1 2 3) @@ -444,7 +693,7 @@ function(generate_jit_lto_kernels target) set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in" "${filename}" @ONLY ) @@ -498,6 +747,12 @@ function(generate_jit_lto_kernels target) ) set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -524,6 +779,12 @@ function(generate_jit_lto_kernels target) ) set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -567,10 +828,11 @@ function(generate_jit_lto_kernels target) # CAGRA only uses uint32_t as SourceIndexT set(source_index_type "uint32_t") set(src_idx_abbrev "ui") - # Regular VPQ kernel entrypoint Note: Metric is no longer in the kernel name - VPQ - # only supports L2Expanded + # Regular VPQ kernel entrypoint Note: "vpq" is no longer in the kernel name - PQ + # parameters distinguish VPQ Metric is no longer in the kernel name - VPQ only + # supports L2Expanded set(kernel_name - "search_single_cta_kernel_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" @@ -582,12 +844,15 @@ function(generate_jit_lto_kernels target) set(pq_bits "${cagra_pq_bits}") set(pq_len "${pq_len}") set(codebook_type "${cagra_codebook_type}") + set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") + set(pq_prefix "") + set(codebook_tag ", tag_codebook_half") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in" "${filename}" @ONLY ) @@ -599,10 +864,11 @@ function(generate_jit_lto_kernels target) EMBEDDED_ARRAY "embedded_${kernel_name}" ) - # Persistent VPQ kernel entrypoint Note: Metric is no longer in the kernel name - VPQ - # only supports L2Expanded + # Persistent VPQ kernel entrypoint Note: "vpq" is no longer in the kernel name - PQ + # parameters distinguish VPQ Metric is no longer in the kernel name - VPQ only + # supports L2Expanded set(kernel_name - "search_single_cta_kernel_p_vpq_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" @@ -614,12 +880,15 @@ function(generate_jit_lto_kernels target) set(pq_bits "${cagra_pq_bits}") set(pq_len "${pq_len}") set(codebook_type "${cagra_codebook_type}") + set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") + set(pq_prefix "") + set(codebook_tag ", tag_codebook_half") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in" "${filename}" @ONLY ) @@ -659,6 +928,12 @@ function(generate_jit_lto_kernels target) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -704,12 +979,15 @@ function(generate_jit_lto_kernels target) set(pq_bits "${cagra_pq_bits}") set(pq_len "${pq_len}") set(codebook_type "${cagra_codebook_type}") + set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") + set(pq_prefix "_vpq") + set(codebook_tag ", tag_codebook_half") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in" "${filename}" @ONLY ) @@ -740,6 +1018,12 @@ function(generate_jit_lto_kernels target) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -767,6 +1051,12 @@ function(generate_jit_lto_kernels target) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") set(team_size "${team_size}") set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -808,12 +1098,15 @@ function(generate_jit_lto_kernels target) set(pq_bits "${cagra_pq_bits}") set(pq_len "${pq_len}") set(codebook_type "${cagra_codebook_type}") + set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") + set(pq_prefix "_vpq") + set(codebook_tag ", tag_codebook_half") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in" "${filename}" @ONLY ) @@ -840,12 +1133,15 @@ function(generate_jit_lto_kernels target) set(pq_bits "${cagra_pq_bits}") set(pq_len "${pq_len}") set(codebook_type "${cagra_codebook_type}") + set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") + set(pq_prefix "_vpq") + set(codebook_tag ", tag_codebook_half") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in" "${filename}" @ONLY ) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_impl.cuh new file mode 100644 index 0000000000..05ac642eac --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_impl.cuh @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance.hpp" // For dataset_descriptor_base_t definition +#include "../device_common.hpp" + +namespace cuvs::neighbors::cagra::detail { + +// Unified compute_distance function - takes void* args and template parameters +// Standard and VPQ versions are in separate impl headers but use the same function name +// The planner links the appropriate fragment at runtime based on PQ_BITS/PQ_LEN +template +extern __device__ DistanceT +compute_distance(const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl_unified.cuh new file mode 100644 index 0000000000..5c6d68948c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl_unified.cuh @@ -0,0 +1,37 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_standard-impl.cuh" +#include "../device_common.hpp" // For dataset_descriptor_base_t +#include "compute_distance_impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +// Unified compute_distance implementation for standard descriptors +// This is instantiated when PQ_BITS=0, PQ_LEN=0, CodebookT=void +template +__device__ DistanceT +compute_distance(const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index) +{ + // For standard descriptors, PQ_BITS=0, PQ_LEN=0, CodebookT=void + static_assert(PQ_BITS == 0 && PQ_LEN == 0 && std::is_same_v, + "Standard descriptor requires PQ_BITS=0, PQ_LEN=0, CodebookT=void"); + + // Reconstruct the descriptor type and call compute_distance_standard + using desc_t = standard_dataset_descriptor_t; + return compute_distance_standard(args, dataset_index); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in new file mode 100644 index 0000000000..96faf933b3 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in @@ -0,0 +1,42 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the unified compute_distance function for standard descriptor +// PQ_BITS=0, PQ_LEN=0, CodebookT=void for standard descriptors +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>( + const args_t, @index_type@); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + // This fragment provides compute_distance (standard version) + // The planner links the appropriate fragment (standard or VPQ) based on descriptor type + registerAlgorithm( + "compute_distance_standard_t@team_size@_dim@dataset_block_dim@", + embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index 6bd2970f94..f8bea170a6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -8,15 +8,16 @@ #ifdef BUILD_KERNEL #include -#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -// Instantiate the compute_distance_to_child_nodes_kernel_jit function with concrete descriptor type +// Instantiate the compute_distance_to_child_nodes_kernel_jit function with unified template parameters +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; -template __global__ void compute_distance_to_child_nodes_kernel_jit( - const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, const desc_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); +// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +template __global__ void compute_distance_to_child_nodes_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@, cuvs::neighbors::filtering::none_sample_filter>( + const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, void*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search @@ -24,19 +25,19 @@ template __global__ void compute_distance_to_child_nodes_kernel_jit #include -#include "compute_distance_to_child_nodes_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_compute_distance_to_child_nodes_kernel_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +__attribute__((__constructor__)) static void register_compute_distance_to_child_nodes_kernel@pq_prefix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() { registerAlgorithm( - "compute_distance_to_child_nodes_kernel_t@team_size@_dim@dataset_block_dim@", - embedded_compute_distance_to_child_nodes_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_compute_distance_to_child_nodes_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + tag_idx_@src_idx_abbrev@@codebook_tag@>( + "compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", + embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in deleted file mode 100644 index 41dbfc4f67..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_vpq.cu.in +++ /dev/null @@ -1,43 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include -#include - -namespace cuvs::neighbors::cagra::detail::multi_kernel_search { - -// Instantiate the compute_distance_to_child_nodes_kernel_jit function with concrete VPQ descriptor type -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; -template __global__ void compute_distance_to_child_nodes_kernel_jit( - const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, const desc_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); - -} // namespace cuvs::neighbors::cagra::detail::multi_kernel_search - -#else - -#include -#include -#include "compute_distance_to_child_nodes_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_compute_distance_to_child_nodes_kernel_vpq_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() -{ - registerAlgorithm( - "compute_distance_to_child_nodes_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_compute_distance_to_child_nodes_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_compute_distance_to_child_nodes_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh new file mode 100644 index 0000000000..e43510b1f2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh @@ -0,0 +1,44 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_vpq-impl.cuh" +#include "../device_common.hpp" // For dataset_descriptor_base_t +#include "compute_distance_impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +// Unified compute_distance implementation for VPQ descriptors +// This is instantiated when PQ_BITS>0, PQ_LEN>0, CodebookT=half +template +__device__ DistanceT +compute_distance(const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index) +{ + // For VPQ descriptors, PQ_BITS>0, PQ_LEN>0, CodebookT=half + static_assert(PQ_BITS > 0 && PQ_LEN > 0 && std::is_same_v, + "VPQ descriptor requires PQ_BITS>0, PQ_LEN>0, CodebookT=half"); + + // Reconstruct the descriptor type and call compute_distance_vpq + using desc_t = cagra_q_dataset_descriptor_t; + return compute_distance_vpq(args, dataset_index); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in new file mode 100644 index 0000000000..d89dfeae67 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in @@ -0,0 +1,43 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the unified compute_distance function for VPQ descriptor +// PQ_BITS>0, PQ_LEN>0, CodebookT=half for VPQ descriptors +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( + const args_t, @index_type@); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + // This fragment provides compute_distance (VPQ version) + // The planner links the appropriate fragment (standard or VPQ) based on descriptor type + registerAlgorithm( + "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_standard_impl.cuh new file mode 100644 index 0000000000..7001c630ae --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_standard_impl.cuh @@ -0,0 +1,110 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_standard-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +// Descriptor accessor fragments for standard descriptors +// These take void* and reconstruct the descriptor pointer, then return the member +// Same function names as VPQ versions - planner links the right fragment at runtime +// Uses unified template parameters (PQ_BITS=0, PQ_LEN=0, CodebookT=void for standard descriptors) + +template +__device__ uint32_t get_dim(void* desc_ptr) +{ + using desc_t = standard_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + return desc->args.dim; +} + +template +__device__ IndexT get_size(void* desc_ptr) +{ + using desc_t = standard_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + return desc->size; +} + +template +__device__ uint32_t get_team_size_bitshift(void* desc_ptr) +{ + using desc_t = standard_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + // Use team_size_bitshift() which works for both global and shared memory descriptors + // team_size_bitshift_from_smem() only works when descriptor is in shared memory + return desc->team_size_bitshift(); +} + +template +__device__ uint32_t get_team_size_bitshift_from_smem(void* desc_ptr) +{ + using desc_t = standard_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + // Use team_size_bitshift_from_smem() which is optimized for shared memory access + return desc->team_size_bitshift_from_smem(); +} + +template +__device__ typename dataset_descriptor_base_t::args_t get_args( + void* desc_ptr) +{ + using desc_t = standard_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + return desc->args.load(); +} + +template +__device__ uint32_t get_smem_ws_size_in_bytes(void* desc_ptr, uint32_t dim) +{ + using desc_t = standard_dataset_descriptor_t; + return desc_t::get_smem_ws_size_in_bytes(dim); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_vpq_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_vpq_impl.cuh new file mode 100644 index 0000000000..f70341cb03 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_vpq_impl.cuh @@ -0,0 +1,151 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +// Descriptor accessor fragments for VPQ descriptors +// These take void* and reconstruct the descriptor pointer, then return the member +// Same function names as standard versions - planner links the right fragment at runtime + +template +__device__ uint32_t get_dim(void* desc_ptr) +{ + using desc_t = cagra_q_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + return desc->args.dim; +} + +template +__device__ IndexT get_size(void* desc_ptr) +{ + using desc_t = cagra_q_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + return desc->size; +} + +template +__device__ uint32_t get_team_size_bitshift(void* desc_ptr) +{ + using desc_t = cagra_q_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + // Use team_size_bitshift() which works for both global and shared memory descriptors + // team_size_bitshift_from_smem() only works when descriptor is in shared memory + return desc->team_size_bitshift(); +} + +template +__device__ uint32_t get_team_size_bitshift_from_smem(void* desc_ptr) +{ + using desc_t = cagra_q_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + // Use team_size_bitshift_from_smem() which is optimized for shared memory access + return desc->team_size_bitshift_from_smem(); +} + +template +__device__ typename dataset_descriptor_base_t::args_t get_args( + void* desc_ptr) +{ + using desc_t = cagra_q_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + return desc->args.load(); +} + +template +__device__ uint32_t get_smem_ws_size_in_bytes(void* desc_ptr, uint32_t dim) +{ + using desc_t = cagra_q_dataset_descriptor_t; + return desc_t::get_smem_ws_size_in_bytes(dim); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh index 37aae16793..91be40c64f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh @@ -30,16 +30,21 @@ struct has_kpq_bits { template inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; -// JIT version of compute_distance_to_random_nodes - uses extern functions +// JIT version of compute_distance_to_random_nodes - uses extern functions with void* descriptor // Shared between single_cta and multi_cta JIT kernels -template RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( IndexT* __restrict__ result_indices_ptr, // [num_pickup] DistanceT* __restrict__ result_distances_ptr, // [num_pickup] - const DescriptorT* smem_desc, // Concrete descriptor type from template + void* smem_desc, // void* descriptor pointer (reconstructed in fragments) const uint32_t num_pickup, const uint32_t num_distilation, const uint64_t rand_xor_mask, @@ -53,13 +58,29 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( const uint32_t num_blocks = 1) { constexpr unsigned warp_size = 32; - const auto team_size_bits = smem_desc->team_size_bitshift_from_smem(); - const auto max_i = raft::round_up_safe(num_pickup, warp_size >> team_size_bits); - // Load args once for better performance (avoid repeated loads in the loop) + // Get team_size_bits and args using accessor fragments + // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type using args_t = typename cuvs::neighbors::cagra::detail:: dataset_descriptor_base_t::args_t; - const args_t args = smem_desc->args.load(); + + // Use get_team_size_bitshift_from_smem since smem_desc is in shared memory + uint32_t team_size_bits = get_team_size_bitshift_from_smem(smem_desc); + args_t args = + get_args( + smem_desc); + IndexT dataset_size = + get_size( + smem_desc); + + const auto max_i = raft::round_up_safe(num_pickup, warp_size >> team_size_bits); for (uint32_t i = threadIdx.x >> team_size_bits; i < max_i; i += (blockDim.x >> team_size_bits)) { const bool valid_i = (i < num_pickup); @@ -74,35 +95,24 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( if (seed_ptr && (gid < num_seeds)) { seed_index = seed_ptr[gid]; } else { - seed_index = device::xorshift64(gid ^ rand_xor_mask) % smem_desc->size; + seed_index = device::xorshift64(gid ^ rand_xor_mask) % dataset_size; } } // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum - // Otherwise warp shuffles will hang. Each thread calls the extern function to get + // Otherwise warp shuffles will hang. Each thread calls the unified extern function to get // its per-thread distance, then team_sum reduces across all threads in the team. DistanceT per_thread_norm2 = 0; if (valid_i) { - if constexpr (!has_kpq_bits_v) { - // Standard descriptor - Metric is no longer a template parameter, linked via dist_op and - // normalization fragments - per_thread_norm2 = compute_distance_standard(args, seed_index); - } else { - // Must be cagra_q_dataset_descriptor_t - VPQ only supports L2Expanded, so Metric is not - // needed - per_thread_norm2 = compute_distance_vpq(args, seed_index); - } + // Use unified compute_distance function (links standard or VPQ fragment at runtime) + per_thread_norm2 = compute_distance(args, seed_index); } // Now ALL threads in the team participate in team_sum const auto norm2_sum = device::team_sum(per_thread_norm2, team_size_bits); @@ -134,9 +144,14 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( } } -// JIT version of compute_distance_to_child_nodes - uses extern functions +// JIT version of compute_distance_to_child_nodes - uses extern functions with void* descriptor // Shared between single_cta and multi_cta JIT kernels -template team_size_bitshift_from_smem(); - const auto num_k = knn_k * search_width; - const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bits); - const bool lead_lane = (threadIdx.x & ((1u << team_size_bits) - 1u)) == 0; - const uint32_t ofst = STATIC_RESULT_POSITION ? 0 : result_position[0]; - // Load args once for better performance (avoid repeated loads in the loop) + // Get team_size_bits and args using accessor fragments + // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type using args_t = typename cuvs::neighbors::cagra::detail:: dataset_descriptor_base_t::args_t; - const args_t args = smem_desc->args.load(); + + // Use get_team_size_bitshift_from_smem since smem_desc is in shared memory + uint32_t team_size_bits = get_team_size_bitshift_from_smem(smem_desc); + args_t args = + get_args( + smem_desc); + + const auto num_k = knn_k * search_width; + const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bits); + const bool lead_lane = (threadIdx.x & ((1u << team_size_bits) - 1u)) == 0; + const uint32_t ofst = STATIC_RESULT_POSITION ? 0 : result_position[0]; for (uint32_t i = threadIdx.x >> team_size_bits; i < max_i; i += blockDim.x >> team_size_bits) { const auto j = i + ofst; @@ -206,30 +234,19 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( const auto child_id = valid_i ? result_child_indices_ptr[j] : invalid_index; // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum - // Otherwise warp shuffles will hang. Each thread calls the extern function to get + // Otherwise warp shuffles will hang. Each thread calls the unified extern function to get // its per-thread distance, then team_sum reduces across all threads in the team. DistanceT per_thread_dist = 0; if (child_id != invalid_index) { - if constexpr (!has_kpq_bits_v) { - // Standard descriptor - Metric is no longer a template parameter, linked via dist_op and - // normalization fragments - per_thread_dist = compute_distance_standard(args, child_id); - } else { - // Must be cagra_q_dataset_descriptor_t - VPQ only supports L2Expanded, so Metric is not - // needed - per_thread_dist = compute_distance_vpq(args, child_id); - } + // Use unified compute_distance function (links standard or VPQ fragment at runtime) + per_thread_dist = compute_distance(args, child_id); } else { // Invalid child_id: lead lane gets upper_bound, others get 0 per_thread_dist = lead_lane ? raft::upper_bound() : 0; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh index 5d412f3566..e57a5ebf06 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -38,9 +38,102 @@ namespace cuvs::neighbors::cagra::detail { // All extern function declarations are in the cuvs::neighbors::cagra::detail namespace // so they can be used by all search modes without being beholden to any specific sub-namespace -// Standard descriptor extern functions -// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization -// fragments +// Descriptor accessor extern functions (standard and VPQ versions use same function names) +// These take void* and reconstruct the descriptor inside +// Planner links the right fragment (standard or VPQ) at runtime based on descriptor type +// Uses unified template parameters: for standard descriptors, PQ_BITS=0, PQ_LEN=0, CodebookT=void +template +extern __device__ uint32_t get_dim(void* desc_ptr); + +template +extern __device__ IndexT get_size(void* desc_ptr); + +template +extern __device__ uint32_t get_team_size_bitshift(void* desc_ptr); + +template +extern __device__ uint32_t get_team_size_bitshift_from_smem(void* desc_ptr); + +template +extern __device__ typename dataset_descriptor_base_t::args_t get_args( + void* desc_ptr); + +template +extern __device__ uint32_t get_smem_ws_size_in_bytes(void* desc_ptr, uint32_t dim); + +// Unified setup_workspace and compute_distance extern functions +// These take void* and reconstruct the descriptor inside +// Standard and VPQ versions are in separate impl headers but use the same function name +template +extern __device__ void* setup_workspace(void* desc_ptr, + void* smem, + const DataT* queries, + uint32_t query_id); + +template +extern __device__ DistanceT +compute_distance(const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index); + +// Standard descriptor extern functions (kept for backward compatibility, but prefer unified +// versions) Note: Metric is no longer a template parameter - it's linked via dist_op and +// normalization fragments template + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_args function for standard descriptor +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ args_t get_args<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_args_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_args_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_args_standard_t@team_size@_dim@dataset_block_dim@", + embedded_get_args_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_args_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in new file mode 100644 index 0000000000..dbe6fef376 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in @@ -0,0 +1,39 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_args function for VPQ descriptor +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ args_t get_args<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in new file mode 100644 index 0000000000..a27c7c78ac --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_dim function for standard descriptor +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +template __device__ uint32_t get_dim<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_dim_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_dim_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_dim_standard_t@team_size@_dim@dataset_block_dim@", + embedded_get_dim_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_dim_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in new file mode 100644 index 0000000000..61db38311f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_dim function for VPQ descriptor +template __device__ uint32_t get_dim<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in new file mode 100644 index 0000000000..7ab5f9f702 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_size function for standard descriptor +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +template __device__ @index_type@ get_size<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_size_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_size_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_size_standard_t@team_size@_dim@dataset_block_dim@", + embedded_get_size_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_size_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in new file mode 100644 index 0000000000..18c5610048 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_size function for VPQ descriptor +template __device__ @index_type@ get_size<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in new file mode 100644 index 0000000000..7ae94bf363 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_smem_ws_size_in_bytes function for standard descriptor +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +template __device__ uint32_t get_smem_ws_size_in_bytes<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*, uint32_t); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@", + embedded_get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in new file mode 100644 index 0000000000..b4332a0fc2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_smem_ws_size_in_bytes function for VPQ descriptor +template __device__ uint32_t get_smem_ws_size_in_bytes<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*, uint32_t); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in new file mode 100644 index 0000000000..21410aee3e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_team_size_bitshift_from_smem function for standard descriptor +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +template __device__ uint32_t get_team_size_bitshift_from_smem<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@", + embedded_get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in new file mode 100644 index 0000000000..2ea5492275 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_team_size_bitshift_from_smem function for VPQ descriptor +template __device__ uint32_t get_team_size_bitshift_from_smem<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in new file mode 100644 index 0000000000..ab288d05b7 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_team_size_bitshift function for standard descriptor +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +template __device__ uint32_t get_team_size_bitshift<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@", + embedded_get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in new file mode 100644 index 0000000000..cae812ca7d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#ifdef BUILD_KERNEL + +#include + +namespace cuvs::neighbors::cagra::detail { + +// Instantiate the get_team_size_bitshift function for VPQ descriptor +template __device__ uint32_t get_team_size_bitshift<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); + +} // namespace cuvs::neighbors::cagra::detail + +#else + +#include +#include +#include "get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" + +using namespace cuvs::neighbors::cagra::detail; + +__attribute__((__constructor__)) static void register_get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +{ + registerAlgorithm( + "get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); +} + +#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in index d134ee33a1..e02cfb08a9 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -8,15 +8,16 @@ #ifdef BUILD_KERNEL #include -#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -// Instantiate the random_pickup_kernel_jit function with concrete descriptor type +// Instantiate the random_pickup_kernel_jit function with unified template parameters +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; -template __global__ void random_pickup_kernel_jit( - const desc_t*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); +// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +template __global__ void random_pickup_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( + void*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search @@ -24,19 +25,19 @@ template __global__ void random_pickup_kernel_jit #include -#include "random_pickup_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_random_pickup_kernel_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +__attribute__((__constructor__)) static void register_random_pickup_kernel@pq_prefix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { registerAlgorithm( - "random_pickup_kernel_t@team_size@_dim@dataset_block_dim@", - embedded_random_pickup_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_random_pickup_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + tag_idx_@idx_abbrev@@codebook_tag@>( + "random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", + embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in deleted file mode 100644 index 4acc085d2d..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_vpq.cu.in +++ /dev/null @@ -1,43 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include -#include - -namespace cuvs::neighbors::cagra::detail::multi_kernel_search { - -// Instantiate the random_pickup_kernel_jit function with concrete VPQ descriptor type -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; -template __global__ void random_pickup_kernel_jit( - const desc_t*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); - -} // namespace cuvs::neighbors::cagra::detail::multi_kernel_search - -#else - -#include -#include -#include "random_pickup_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_random_pickup_kernel_vpq_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "random_pickup_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_random_pickup_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_random_pickup_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index 3aae61b33b..1359101627 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -11,17 +11,16 @@ #include // For pickup_next_parent and topk_by_bitonic_sort_wrapper_* #include -#include -#include -#include namespace cuvs::neighbors::cagra::detail::multi_cta_search { -// Instantiate the search_kernel_jit function with concrete descriptor type +// Instantiate the search_kernel_jit function with unified template parameters +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; -template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( - @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); +// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( + @index_type@* const, @distance_type@* const, void*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::multi_cta_search @@ -29,19 +28,19 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit #include -#include "search_multi_cta_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_search_multi_cta_kernel_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() +__attribute__((__constructor__)) static void register_search_multi_cta_kernel@pq_prefix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() { registerAlgorithm( - "search_multi_cta_kernel_t@team_size@_dim@dataset_block_dim@", - embedded_search_multi_cta_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_multi_cta_kernel_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + tag_idx_@src_idx_abbrev@@codebook_tag@>( + "search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", + embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh index 5190da7725..46270421b7 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -42,9 +42,14 @@ using cuvs::neighbors::detail::sample_filter; using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; -// JIT version of search_kernel - uses extern functions with concrete descriptor type +// JIT version of search_kernel - uses extern functions with void* descriptor pointer +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT // Filter is linked separately via JIT LTO, so we use none_sample_filter directly -template (result_buffer_size, 32); assert(result_buffer_size_32 <= max_elements); - // Get smem_ws_size_in_bytes using static method (dim is in descriptor args) - uint32_t dim = dataset_desc->args.dim; - uint32_t smem_ws_size_in_bytes = DescriptorT::get_smem_ws_size_in_bytes(dim); - - // Set smem working buffer for the distance calculation using extern function - // setup_workspace copies the descriptor to shared memory and returns pointer to smem descriptor - // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - const DescriptorT* smem_desc = nullptr; - // Check if DescriptorT is a standard_dataset_descriptor_t by checking if it doesn't have kPqBits - // (standard descriptors don't have kPqBits, VPQ descriptors do) - if constexpr (!has_kpq_bits_v) { - // Standard descriptor - Metric is no longer a template parameter, linked via dist_op and - // normalization fragments - smem_desc = setup_workspace_standard(dataset_desc, smem, queries_ptr, query_id); - } else { - // Must be cagra_q_dataset_descriptor_t - VPQ only supports L2Expanded, so Metric is not needed - smem_desc = setup_workspace_vpq( + dataset_desc); + uint32_t smem_ws_size_in_bytes = get_smem_ws_size_in_bytes(dataset_desc, dim); + + // Set smem working buffer using unified setup_workspace + // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type + void* smem_desc = setup_workspace(dataset_desc, smem, queries_ptr, query_id); - } auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); @@ -174,21 +174,27 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( uint32_t block_id = cta_id + (num_cta_per_query * query_id); uint32_t num_blocks = num_cta_per_query * num_queries; - compute_distance_to_random_nodes_jit( - result_indices_buffer, - result_distances_buffer, - smem_desc, - graph_degree, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - visited_hash_bitlen, - local_traversed_hashmap_ptr, - traversed_hash_bitlen, - block_id, - num_blocks); + compute_distance_to_random_nodes_jit(result_indices_buffer, + result_distances_buffer, + smem_desc, + graph_degree, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + visited_hash_bitlen, + local_traversed_hashmap_ptr, + traversed_hash_bitlen, + block_id, + num_blocks); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -268,21 +274,28 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( __syncthreads(); // Compute the norms between child nodes and query node using JIT version - compute_distance_to_child_nodes_jit( - result_indices_buffer, - result_distances_buffer, - smem_desc, - knn_graph, - graph_degree, - local_visited_hashmap_ptr, - visited_hash_bitlen, - local_traversed_hashmap_ptr, - traversed_hash_bitlen, - parent_indices_buffer, - result_indices_buffer, - 1, - result_position, - result_buffer_size_32); + compute_distance_to_child_nodes_jit(result_indices_buffer, + result_distances_buffer, + smem_desc, + knn_graph, + graph_degree, + local_visited_hashmap_ptr, + visited_hash_bitlen, + local_traversed_hashmap_ptr, + traversed_hash_bitlen, + parent_indices_buffer, + result_indices_buffer, + 1, + result_position, + result_buffer_size_32); __syncthreads(); // Check the state of the nodes in the result buffer which were not updated diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in deleted file mode 100644 index 7d41a6778f..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_vpq.cu.in +++ /dev/null @@ -1,47 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -// Include helpers first so they're available when the JIT kernel is instantiated -#include // For pickup_next_parent and topk_by_bitonic_sort_wrapper_* - -#include -#include -#include - -namespace cuvs::neighbors::cagra::detail::multi_cta_search { - -// Instantiate the search_kernel_jit function with concrete VPQ descriptor type -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; -template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( - @index_type@* const, @distance_type@* const, const desc_t*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); - -} // namespace cuvs::neighbors::cagra::detail::multi_cta_search - -#else - -#include -#include -#include "search_multi_cta_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_search_multi_cta_kernel_vpq_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() -{ - registerAlgorithm( - "search_multi_cta_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_search_multi_cta_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_multi_cta_kernel_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index 188a5e481d..c4d498827f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -150,6 +150,80 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { this->device_functions.push_back("sample_filter_" + filter_name); } + void add_descriptor_accessor_device_functions(uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + // Register all descriptor accessor fragments (get_dim, get_size, get_team_size_bitshift, + // get_args, get_smem_ws_size_in_bytes) + // These fragments allow kernels to access descriptor members via void* pointers + if (is_vpq) { + using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; + auto params = make_fragment_key(); + std::string base = "get_dim_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_size_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + // Multi CTA kernels only use shared memory descriptors (after setup_workspace), + // so we only need get_team_size_bitshift_from_smem + base = "get_team_size_bitshift_from_smem_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_args_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_smem_ws_size_in_bytes_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + } else { + auto params = make_fragment_key(); + std::string base = "get_dim_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_size_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + // Multi CTA kernels only use shared memory descriptors (after setup_workspace), + // so we only need get_team_size_bitshift_from_smem + base = "get_team_size_bitshift_from_smem_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_args_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_smem_ws_size_in_bytes_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + } + } + private: std::string entrypoint_name_; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh index b6082823ee..3a4ccde06d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh @@ -36,67 +36,72 @@ struct has_kpq_bits { template inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; -// JIT version of random_pickup_kernel - uses extern functions -template RAFT_KERNEL random_pickup_kernel_jit( - const DescriptorT* dataset_desc, // Concrete descriptor type from template - const typename DescriptorT::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + void* dataset_desc, // void* descriptor pointer (reconstructed in fragments) + const DataT* const queries_ptr, // [num_queries, dataset_dim] const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, - const typename DescriptorT::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const IndexT* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, - typename DescriptorT::INDEX_T* const result_indices_ptr, // [num_queries, ldr] - typename DescriptorT::DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] - const std::uint32_t ldr, // (*) ldr >= num_pickup - typename DescriptorT::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] + IndexT* const result_indices_ptr, // [num_queries, ldr] + DistanceT* const result_distances_ptr, // [num_queries, ldr] + const std::uint32_t ldr, // (*) ldr >= num_pickup + IndexT* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] const std::uint32_t hash_bitlen) { - using DATA_T = typename DescriptorT::DATA_T; - using INDEX_T = typename DescriptorT::INDEX_T; - using DISTANCE_T = typename DescriptorT::DISTANCE_T; + using DATA_T = DataT; + using INDEX_T = IndexT; + using DISTANCE_T = DistanceT; + + // Get team_size_bits using accessor fragment (planner links the right fragment at runtime) + uint32_t team_size_bits = get_team_size_bitshift(dataset_desc); - const auto team_size_bits = dataset_desc->team_size_bitshift(); const auto ldb = hashmap::get_size(hash_bitlen); const auto global_team_index = (blockIdx.x * blockDim.x + threadIdx.x) >> team_size_bits; const uint32_t query_id = blockIdx.y; if (global_team_index >= num_pickup) { return; } extern __shared__ uint8_t smem[]; - // Set smem working buffer for the distance calculation using extern function - // setup_workspace copies the descriptor to shared memory and returns pointer to smem descriptor + // Set smem working buffer using unified setup_workspace + // setup_workspace copies the descriptor to shared memory and returns void* to smem descriptor // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - const DescriptorT* smem_desc = nullptr; - // Check if DescriptorT is a standard_dataset_descriptor_t by checking if it doesn't have kPqBits - // (standard descriptors don't have kPqBits, VPQ descriptors do) - if constexpr (!has_kpq_bits_v) { - // Standard descriptor - Metric is no longer a template parameter, linked via dist_op and - // normalization fragments - smem_desc = setup_workspace_standard(dataset_desc, smem, queries_ptr, query_id); - } else { - // Must be cagra_q_dataset_descriptor_t - VPQ only supports L2Expanded, so Metric is not needed - smem_desc = setup_workspace_vpq(dataset_desc, smem, queries_ptr, query_id); - } __syncthreads(); // Load args once for better performance (avoid repeated loads in the loop) using args_t = typename cuvs::neighbors::cagra::detail:: dataset_descriptor_base_t::args_t; - const args_t args = smem_desc->args.load(); + args_t args = + get_args( + smem_desc); + IndexT dataset_size = + get_size( + smem_desc); INDEX_T best_index_team_local; DISTANCE_T best_norm2_team_local = utils::get_max_value(); @@ -106,34 +111,22 @@ RAFT_KERNEL random_pickup_kernel_jit( seed_index = seed_ptr[global_team_index + (num_seeds * query_id)]; } else { // Chose a seed node randomly - seed_index = - device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % smem_desc->size; + seed_index = device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_size; } // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum - // Otherwise warp shuffles will hang. Each thread calls the extern function to get + // Otherwise warp shuffles will hang. Each thread calls the unified extern function to get // its per-thread distance, then team_sum reduces across all threads in the team. DistanceT per_thread_norm2 = 0; - if constexpr (!has_kpq_bits_v) { - // Standard descriptor - Metric is no longer a template parameter, linked via dist_op and - // normalization fragments - per_thread_norm2 = compute_distance_standard(args, seed_index); - } else { - // Must be cagra_q_dataset_descriptor_t - VPQ only supports L2Expanded, so Metric is not - // needed - per_thread_norm2 = compute_distance_vpq(args, seed_index); - } + // Use unified compute_distance function (planner links standard or VPQ fragment at runtime) + per_thread_norm2 = compute_distance(args, seed_index); // Now ALL threads in the team participate in team_sum const auto norm2 = device::team_sum(per_thread_norm2, team_size_bits); @@ -156,33 +149,49 @@ RAFT_KERNEL random_pickup_kernel_jit( } } -// JIT version of compute_distance_to_child_nodes_kernel - uses extern functions -template RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( - const typename DescriptorT::INDEX_T* const parent_node_list, // [num_queries, search_width] - typename DescriptorT::INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] - typename DescriptorT::DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const IndexT* const parent_node_list, // [num_queries, search_width] + IndexT* const parent_candidates_ptr, // [num_queries, search_width] + DistanceT* const parent_distance_ptr, // [num_queries, search_width] const std::size_t lds, const std::uint32_t search_width, - const DescriptorT* dataset_desc, // Concrete descriptor type from template - const typename DescriptorT::INDEX_T* const neighbor_graph_ptr, // [dataset_size, graph_degree] + void* dataset_desc, // void* descriptor pointer (reconstructed in fragments) + const IndexT* const neighbor_graph_ptr, // [dataset_size, graph_degree] const std::uint32_t graph_degree, const SourceIndexT* source_indices_ptr, - const typename DescriptorT::DATA_T* query_ptr, // [num_queries, data_dim] - typename DescriptorT::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + const DataT* query_ptr, // [num_queries, data_dim] + IndexT* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t hash_bitlen, - typename DescriptorT::INDEX_T* const result_indices_ptr, // [num_queries, ldd] - typename DescriptorT::DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + IndexT* const result_indices_ptr, // [num_queries, ldd] + DistanceT* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree SAMPLE_FILTER_T sample_filter) { - using INDEX_T = typename DescriptorT::INDEX_T; - using DISTANCE_T = typename DescriptorT::DISTANCE_T; - using DataT = typename DescriptorT::DATA_T; + using INDEX_T = IndexT; + using DISTANCE_T = DistanceT; + + // Get team_size_bits using accessor fragment (planner links the right fragment at runtime) + uint32_t team_size_bits = get_team_size_bitshift(dataset_desc); - const auto team_size_bits = dataset_desc->team_size_bitshift(); const auto team_size = 1u << team_size_bits; const uint32_t ldb = hashmap::get_size(hash_bitlen); const auto tid = threadIdx.x + blockDim.x * blockIdx.x; @@ -190,29 +199,17 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( const auto query_id = blockIdx.y; extern __shared__ uint8_t smem[]; - // Load a query using extern function - // setup_workspace copies the descriptor to shared memory and returns pointer to smem descriptor + // Load a query using unified setup_workspace + // setup_workspace copies the descriptor to shared memory and returns void* to smem descriptor // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - const DescriptorT* smem_desc = nullptr; - if constexpr (!has_kpq_bits_v) { - // Standard descriptor - Metric is no longer a template parameter, linked via dist_op and - // normalization fragments - smem_desc = setup_workspace_standard(dataset_desc, smem, query_ptr, query_id); - } else { - // Must be cagra_q_dataset_descriptor_t - VPQ only supports L2Expanded, so Metric is not needed - smem_desc = setup_workspace_vpq(dataset_desc, smem, query_ptr, query_id); - } + IndexT, + DistanceT>(dataset_desc, smem, query_ptr, query_id); __syncthreads(); if (global_team_id >= search_width * graph_degree) { return; } @@ -241,33 +238,24 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( // Load args once for better performance (avoid repeated loads) using args_t = typename cuvs::neighbors::cagra::detail:: dataset_descriptor_base_t::args_t; - const args_t args = smem_desc->args.load(); + args_t args = + get_args( + smem_desc); // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum - // Otherwise warp shuffles will hang. Each thread calls the extern function to get + // Otherwise warp shuffles will hang. Each thread calls the unified extern function to get // its per-thread distance, then team_sum reduces across all threads in the team. DISTANCE_T per_thread_norm2 = 0; if (compute_distance_flag) { - if constexpr (!has_kpq_bits_v) { - // Standard descriptor - Metric is no longer a template parameter, linked via dist_op and - // normalization fragments - per_thread_norm2 = compute_distance_standard(args, child_id); - } else { - // Must be cagra_q_dataset_descriptor_t - VPQ only supports L2Expanded, so Metric is not - // needed - per_thread_norm2 = compute_distance_vpq(args, child_id); - } + // Use unified compute_distance function (planner links standard or VPQ fragment at runtime) + per_thread_norm2 = compute_distance(args, child_id); } // Now ALL threads in the team participate in team_sum DISTANCE_T norm2 = device::team_sum(per_thread_norm2, team_size_bits); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index ba44a5d9cf..c7641a9bd3 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -153,6 +153,87 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { this->device_functions.push_back("sample_filter_" + filter_name); } + void add_descriptor_accessor_device_functions(uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + // Register all descriptor accessor fragments (get_dim, get_size, get_team_size_bitshift, + // get_args, get_smem_ws_size_in_bytes) + // These fragments allow kernels to access descriptor members via void* pointers + if (is_vpq) { + using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; + auto params = make_fragment_key(); + std::string base = "get_dim_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_size_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_team_size_bitshift_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_team_size_bitshift_from_smem_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_args_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_smem_ws_size_in_bytes_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + } else { + auto params = make_fragment_key(); + std::string base = "get_dim_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_size_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_team_size_bitshift_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_team_size_bitshift_from_smem_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_args_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_smem_ws_size_in_bytes_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + } + } + void set_entrypoint_name(const std::string& name) { entrypoint_name_ = name; } private: diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index f1c99ee698..7afbb5400f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -8,15 +8,16 @@ #ifdef BUILD_KERNEL #include -#include namespace cuvs::neighbors::cagra::detail::single_cta_search { -// Instantiate the search_kernel_jit function with concrete descriptor type +// Instantiate the search_kernel_jit function with unified template parameters +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; -template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); +// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( + uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, void*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search @@ -24,7 +25,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_b #include #include -#include "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -33,10 +34,10 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_@ registerAlgorithm( - "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@", - embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + tag_idx_@src_idx_abbrev@@codebook_tag@>( + "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", + embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh index 8e30192605..f500ef8eeb 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -84,10 +84,17 @@ using cuvs::neighbors::detail::sample_filter; using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; -// JIT version of search_core - uses extern functions with descriptor pointer +// JIT version of search_core - uses extern functions with void* descriptor pointer +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half template (result_buffer_size, 32); const auto small_hash_size = hashmap::get_size(small_hash_bitlen); - // Get smem_ws_size_in_bytes using static method (dim is in descriptor args) - uint32_t dim = dataset_desc->args.dim; - uint32_t smem_ws_size_in_bytes = DescriptorT::get_smem_ws_size_in_bytes(dim); - - // Set smem working buffer for the distance calculation using extern function - // setup_workspace copies the descriptor to shared memory and returns pointer to smem descriptor + // Get dim using accessor fragment (reconstructs descriptor from void*) + // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type + uint32_t dim = + get_dim( + dataset_desc); + uint32_t smem_ws_size_in_bytes = get_smem_ws_size_in_bytes(dataset_desc, dim); + + // Set smem working buffer using unified setup_workspace + // setup_workspace copies the descriptor to shared memory and returns void* to smem descriptor // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - const DescriptorT* smem_desc = nullptr; - // Check if DescriptorT is a standard_dataset_descriptor_t by checking if it doesn't have kPqBits - // (standard descriptors don't have kPqBits, VPQ descriptors do) - if constexpr (!has_kpq_bits_v) { - // Standard descriptor - Metric is no longer a template parameter, linked via dist_op and - // normalization fragments - smem_desc = setup_workspace_standard(dataset_desc, smem, queries_ptr, query_id); - } else { - // Must be cagra_q_dataset_descriptor_t - VPQ only supports L2Expanded, so Metric is not needed - smem_desc = setup_workspace_vpq(dataset_desc, smem, queries_ptr, query_id); - } auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); @@ -216,20 +220,29 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( // compute distance to randomly selecting nodes using JIT version _CLK_START(); const IndexT* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; - IndexT dataset_size = smem_desc->size; - compute_distance_to_random_nodes_jit( - result_indices_buffer, - result_distances_buffer, - smem_desc, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen, - (IndexT*)nullptr, - 0); + // Get dataset_size using accessor fragment (planner links the right fragment at runtime) + IndexT dataset_size = + get_size( + smem_desc); + compute_distance_to_random_nodes_jit(result_indices_buffer, + result_distances_buffer, + smem_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -337,19 +350,25 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( __syncthreads(); // compute the norms between child nodes and query node using JIT version _CLK_START(); - compute_distance_to_child_nodes_jit( - result_indices_buffer + internal_topk, - result_distances_buffer + internal_topk, - smem_desc, - knn_graph, - graph_degree, - local_visited_hashmap_ptr, - hash_bitlen, - (IndexT*)nullptr, - 0, - parent_list_buffer, - result_indices_buffer, - search_width); + compute_distance_to_child_nodes_jit(result_indices_buffer + internal_topk, + result_distances_buffer + internal_topk, + smem_desc, + knn_graph, + graph_degree, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0, + parent_list_buffer, + result_indices_buffer, + search_width); // Critical: __syncthreads() must be reached by ALL threads // If any thread is stuck in compute_distance_to_child_nodes_jit, this will hang __syncthreads(); @@ -518,9 +537,14 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( } // JIT kernel wrapper - calls search_core +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT template -#include namespace cuvs::neighbors::cagra::detail::single_cta_search { -// Instantiate the search_kernel_p_jit function with concrete descriptor type +// Instantiate the search_kernel_p_jit function with unified template parameters +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -using desc_t = cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>; -template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); +// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( + worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, void*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search @@ -24,7 +25,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by #include #include -#include "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -33,10 +34,10 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_p registerAlgorithm( - "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@", - embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + tag_idx_@src_idx_abbrev@@codebook_tag@>( + "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", + embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in deleted file mode 100644 index c708b15920..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_vpq.cu.in +++ /dev/null @@ -1,43 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include -#include - -namespace cuvs::neighbors::cagra::detail::single_cta_search { - -// Instantiate the search_kernel_p_jit function with concrete VPQ descriptor type -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; -template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); - -} // namespace cuvs::neighbors::cagra::detail::single_cta_search - -#else - -#include -#include -#include "search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() -{ - registerAlgorithm( - "search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_p_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in deleted file mode 100644 index 286b2b29b3..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_vpq.cu.in +++ /dev/null @@ -1,43 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include -#include - -namespace cuvs::neighbors::cagra::detail::single_cta_search { - -// Instantiate the search_kernel_jit function with concrete VPQ descriptor type -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; -template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, desc_t, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const desc_t*, uint32_t*, @source_index_type@, @source_index_type@); - -} // namespace cuvs::neighbors::cagra::detail::single_cta_search - -#else - -#include -#include -#include "search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() -{ - registerAlgorithm( - "search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_vpq_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index d2445bdcca..bfd484a957 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -168,6 +168,80 @@ struct CagraSearchPlanner : AlgorithmPlanner { this->device_functions.push_back("sample_filter_" + filter_name); } + void add_descriptor_accessor_device_functions(uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + // Register all descriptor accessor fragments (get_dim, get_size, get_team_size_bitshift, + // get_args, get_smem_ws_size_in_bytes) + // These fragments allow kernels to access descriptor members via void* pointers + if (is_vpq) { + using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; + auto params = make_fragment_key(); + std::string base = "get_dim_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_size_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + // Single CTA kernels only use shared memory descriptors (after setup_workspace), + // so we only need get_team_size_bitshift_from_smem + base = "get_team_size_bitshift_from_smem_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_args_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_smem_ws_size_in_bytes_vpq_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + base += "_" + params; + this->device_functions.push_back(base); + } else { + auto params = make_fragment_key(); + std::string base = "get_dim_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_size_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + // Single CTA kernels only use shared memory descriptors (after setup_workspace), + // so we only need get_team_size_bitshift_from_smem + base = "get_team_size_bitshift_from_smem_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_args_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + + base = "get_smem_ws_size_in_bytes_standard_t" + std::to_string(team_size); + base += "_dim" + std::to_string(dataset_block_dim); + base += "_" + params; + this->device_functions.push_back(base); + } + } + private: static std::string build_entrypoint_name(cuvs::distance::DistanceType metric, bool topk_by_bitonic_sort, @@ -180,10 +254,7 @@ struct CagraSearchPlanner : AlgorithmPlanner { bool persistent) { std::string name = (persistent ? "search_single_cta_kernel_p_" : "search_single_cta_kernel_"); - if (is_vpq) { - name += "vpq_"; - // Note: Metric is no longer in VPQ kernel names - VPQ only supports L2Expanded - } + // Note: "vpq" is no longer in the name - PQ parameters distinguish VPQ from standard name += bool_to_string(topk_by_bitonic_sort) + "_"; name += bool_to_string(bitonic_sort_and_merge_multi_warps) + "_"; // Note: Metric is no longer in kernel names - it's linked via dist_op and normalization diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh new file mode 100644 index 0000000000..c40400ed95 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../device_common.hpp" + +namespace cuvs::neighbors::cagra::detail { + +// Unified setup_workspace function - takes void* and template parameters +// Standard and VPQ versions are in separate impl headers but use the same function name +// The planner links the appropriate fragment at runtime based on PQ_BITS/PQ_LEN +template +extern __device__ void* setup_workspace(void* desc_ptr, + void* smem, + const DataT* queries, + uint32_t query_id); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in index 7afb69cb1e..204bd4e285 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in @@ -7,13 +7,14 @@ #ifdef BUILD_KERNEL -#include +#include namespace cuvs::neighbors::cagra::detail { -// Instantiate the setup_workspace_standard function for standard descriptor -// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -template __device__ const cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>* setup_workspace_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>(const cuvs::neighbors::cagra::detail::standard_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); +// Instantiate the unified setup_workspace function for standard descriptor +// PQ_BITS=0, PQ_LEN=0, CodebookT=void for standard descriptors +template __device__ void* setup_workspace<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>( + void*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail @@ -27,6 +28,8 @@ using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { + // This fragment provides setup_workspace (standard version) + // The planner links the appropriate fragment (standard or VPQ) based on descriptor type registerAlgorithm( diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh new file mode 100644 index 0000000000..cc43e12b50 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh @@ -0,0 +1,43 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../compute_distance_standard-impl.cuh" +#include "../device_common.hpp" +#include "setup_workspace_impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +// Unified setup_workspace implementation for standard descriptors +// This is instantiated when PQ_BITS=0, PQ_LEN=0, CodebookT=void +// Takes void* and reconstructs the descriptor inside +template +__device__ void* setup_workspace(void* desc_ptr, + void* smem, + const DataT* queries, + uint32_t query_id) +{ + // For standard descriptors, PQ_BITS=0, PQ_LEN=0, CodebookT=void + static_assert(PQ_BITS == 0 && PQ_LEN == 0 && std::is_same_v, + "Standard descriptor requires PQ_BITS=0, PQ_LEN=0, CodebookT=void"); + + // Reconstruct the descriptor pointer from void* + using desc_t = standard_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + + // Call the free function directly - it takes DescriptorT as template parameter + const desc_t* result = setup_workspace_standard(desc, smem, queries, query_id); + return const_cast(result); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in index dc87a44e02..394a86992c 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in @@ -7,13 +7,14 @@ #ifdef BUILD_KERNEL -#include +#include namespace cuvs::neighbors::cagra::detail { -// Instantiate the setup_workspace_vpq function for VPQ descriptor -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded -template __device__ const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>* setup_workspace_vpq<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); +// Instantiate the unified setup_workspace function for VPQ descriptor +// PQ_BITS>0, PQ_LEN>0, CodebookT=half for VPQ descriptors +template __device__ void* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( + void*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail @@ -27,6 +28,8 @@ using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { + // This fragment provides setup_workspace (VPQ version) + // The planner links the appropriate fragment (standard or VPQ) based on descriptor type registerAlgorithm0, PQ_LEN>0, CodebookT=half +// Takes void* and reconstructs the descriptor inside +template +__device__ void* setup_workspace(void* desc_ptr, + void* smem, + const DataT* queries, + uint32_t query_id) +{ + // For VPQ descriptors, PQ_BITS>0, PQ_LEN>0, CodebookT=half + static_assert(PQ_BITS > 0 && PQ_LEN > 0 && std::is_same_v, + "VPQ descriptor requires PQ_BITS>0, PQ_LEN>0, CodebookT=half"); + + // Reconstruct the descriptor pointer from void* + using desc_t = cagra_q_dataset_descriptor_t; + const desc_t* desc = reinterpret_cast(desc_ptr); + + // Call the free function directly - it takes DescriptorT as template parameter + const desc_t* result = setup_workspace_vpq(desc, smem, queries, query_id); + return const_cast(result); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 1811a60eb5..8b96f3834d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -130,6 +130,12 @@ void select_and_run_jit( dataset_desc.pq_bits, dataset_desc.pq_len); + // Register descriptor accessor fragments first (needed for void* descriptor access) + planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -248,6 +254,7 @@ void select_and_run_jit( graph.extent(0), graph.extent(1)); + RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_CTA launching kernel on stream=%p", stream); launcher->dispatch(stream, grid_dims, block_dims, diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index 7ae4d11cd5..5efcf3a557 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -888,6 +888,14 @@ struct search } // Choose initial entry point candidates at random + // Validate all pointers before kernel launch to prevent illegal memory access + if (result_indices.data() == nullptr) { + RAFT_FAIL("result_indices.data() is NULL before random_pickup"); + } + if (result_distances.data() == nullptr) { + RAFT_FAIL("result_distances.data() is NULL before random_pickup"); + } + if (hashmap.data() == nullptr) { RAFT_FAIL("hashmap.data() is NULL before random_pickup"); } random_pickup(dataset_desc, queries_ptr, num_queries, @@ -1085,7 +1093,6 @@ struct search num_executed_iterations[i] = iter; } } - RAFT_CUDA_TRY(cudaPeekAtLastError()); } }; diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh index 86220dc6ae..c5d36a6bc9 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -69,6 +69,12 @@ void random_pickup_jit(const dataset_descriptor_host& dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); + // Register descriptor accessor fragments first (needed for void* descriptor access) + planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -90,12 +96,31 @@ void random_pickup_jit(const dataset_descriptor_host& // Get the device descriptor pointer const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); + if (dev_desc == nullptr) { + RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) dev_desc is NULL"); + } + + // Validate all pointers before kernel launch to prevent illegal memory access + if (queries_ptr == nullptr) { + RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) queries_ptr is NULL"); + } + if (result_indices_ptr == nullptr) { + RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) result_indices_ptr is NULL"); + } + if (result_distances_ptr == nullptr) { + RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) result_distances_ptr is NULL"); + } + if (visited_hashmap_ptr == nullptr) { + RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) visited_hashmap_ptr is NULL"); + } // Cast size_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly const uint32_t ldr_u32 = static_cast(ldr); // Dispatch kernel via launcher + RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) launching kernel on stream=%p", + cuda_stream); launcher->dispatch(cuda_stream, grid_size, dim3(block_size, 1, 1), @@ -189,6 +214,12 @@ void compute_distance_to_child_nodes_jit( dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); + // Register descriptor accessor fragments first (needed for void* descriptor access) + planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 54e51d32ec..34a9f11807 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -666,6 +666,12 @@ void select_and_run_jit( true /* persistent */); // Add device functions + // Register descriptor accessor fragments first (needed for void* descriptor access) + planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -728,6 +734,12 @@ void select_and_run_jit( dataset_desc.pq_len); // Add device functions (tags are determined inside the planner methods) + // Register descriptor accessor fragments first (needed for void* descriptor access) + planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, From 0e201e8a2d3e92ee4fa7da05b91f19ee8c26b336 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 19 Feb 2026 19:07:06 +0000 Subject: [PATCH 111/158] attempt to fix cuda 12 builds --- .../cagra/compute_distance_vpq-impl.cuh | 14 +++++- .../detail/cagra/search_multi_cta.cuh | 16 ++++--- .../detail/cagra/search_multi_cta_inst.cuh | 45 ++++++++++--------- .../detail/cagra/search_multi_kernel.cuh | 40 ++++++++--------- 4 files changed, 64 insertions(+), 51 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh index 004aaf39b6..b51ae024e8 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh @@ -384,7 +384,12 @@ RAFT_KERNEL __launch_bounds__(1, 1) CodebookT, DataT, IndexT, - DistanceT>; + DistanceT +#if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) + , + Metric +#endif + >; using base_type = typename desc_type::base_type; #ifdef CUVS_ENABLE_JIT_LTO // For JIT, we don't use the function pointers, so set them to nullptr @@ -449,7 +454,12 @@ vpq_descriptor_spec; + DistanceT +#if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) + , + Metric +#endif + >; using base_type = typename desc_type::base_type; return host_type{desc_type{nullptr, diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh index 74822c8660..f3871a55f3 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -30,6 +30,8 @@ #include #include // RAFT_CUDA_TRY_NOT_THROW is used TODO(tfeher): consider moving this to cuda_rt_essentials.hpp +#include + #include #include #include @@ -91,10 +93,10 @@ struct search constexpr static bool kNeedIndexCopy = sizeof(INDEX_T) != sizeof(OutputIndexT); uint32_t num_cta_per_query; - lightweight_uvector intermediate_indices; - lightweight_uvector intermediate_distances; + rmm::device_uvector intermediate_indices; + rmm::device_uvector intermediate_distances; size_t topk_workspace_size; - lightweight_uvector topk_workspace; + rmm::device_uvector topk_workspace; search(raft::resources const& res, search_params params, @@ -104,9 +106,9 @@ struct search int64_t graph_degree, uint32_t topk) : base_type(res, params, dataset_desc, dim, dataset_size, graph_degree, topk), - intermediate_indices(res), - intermediate_distances(res), - topk_workspace(res) + intermediate_indices(0, raft::resource::get_cuda_stream(res)), + intermediate_distances(0, raft::resource::get_cuda_stream(res)), + topk_workspace(0, raft::resource::get_cuda_stream(res)) { set_params(res, params); } diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh index fe4b7a3720..834b7b21ee 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh @@ -6,33 +6,34 @@ #pragma once #include "../../sample_filter.cuh" +#include "sample_filter_utils.cuh" #include "search_multi_cta_kernel-inl.cuh" #include namespace cuvs::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(DataT, IndexT, DistanceT, SampleFilterT) \ - template void select_and_run( \ - const dataset_descriptor_host& dataset_desc, \ - raft::device_matrix_view graph, \ - const IndexT* source_indices_ptr, \ - uint32_t* topk_indices_ptr, \ - DistanceT* topk_distances_ptr, \ - const DataT* queries_ptr, \ - uint32_t num_queries, \ - const uint32_t* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - const search_params& ps, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - uint32_t small_hash_bitlen, \ - int64_t hash_bitlen, \ - uint32_t* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_seeds, \ - SampleFilterT sample_filter, \ +#define instantiate_kernel_selection(DataT, IndexT, DistanceT, SampleFilterT) \ + template void select_and_run( \ + const dataset_descriptor_host& dataset_desc, \ + raft::device_matrix_view graph, \ + const IndexT* source_indices_ptr, \ + IndexT* topk_indices_ptr, \ + DistanceT* topk_distances_ptr, \ + const DataT* queries_ptr, \ + uint32_t num_queries, \ + const IndexT* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + const search_params& ps, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + uint32_t small_hash_bitlen, \ + int64_t hash_bitlen, \ + IndexT* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_seeds, \ + SampleFilterT sample_filter, \ cudaStream_t stream); } // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index 5efcf3a557..025827c112 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -701,18 +701,18 @@ struct search using base_type::num_seeds; size_t result_buffer_allocation_size; - lightweight_uvector result_indices; // results_indices_buffer - lightweight_uvector result_distances; // result_distances_buffer - lightweight_uvector parent_node_list; - lightweight_uvector topk_hint; - lightweight_uvector terminate_flag; // dev_terminate_flag, host_terminate_flag.; - lightweight_uvector topk_workspace; + rmm::device_uvector result_indices; // results_indices_buffer + rmm::device_uvector result_distances; // result_distances_buffer + rmm::device_uvector parent_node_list; + rmm::device_uvector topk_hint; + rmm::device_uvector terminate_flag; // dev_terminate_flag, host_terminate_flag.; + rmm::device_uvector topk_workspace; // temporary storage for _find_topk - lightweight_uvector input_keys_storage; - lightweight_uvector output_keys_storage; - lightweight_uvector input_values_storage; - lightweight_uvector output_values_storage; + rmm::device_uvector input_keys_storage; + rmm::device_uvector output_keys_storage; + rmm::device_uvector input_values_storage; + rmm::device_uvector output_values_storage; search(raft::resources const& res, search_params params, @@ -722,16 +722,16 @@ struct search int64_t graph_degree, uint32_t topk) : base_type(res, params, dataset_desc, dim, dataset_size, graph_degree, topk), - result_indices(res), - result_distances(res), - parent_node_list(res), - topk_hint(res), - topk_workspace(res), - terminate_flag(res), - input_keys_storage(res), - output_keys_storage(res), - input_values_storage(res), - output_values_storage(res) + result_indices(0, raft::resource::get_cuda_stream(res)), + result_distances(0, raft::resource::get_cuda_stream(res)), + parent_node_list(0, raft::resource::get_cuda_stream(res)), + topk_hint(0, raft::resource::get_cuda_stream(res)), + topk_workspace(0, raft::resource::get_cuda_stream(res)), + terminate_flag(0, raft::resource::get_cuda_stream(res)), + input_keys_storage(0, raft::resource::get_cuda_stream(res)), + output_keys_storage(0, raft::resource::get_cuda_stream(res)), + input_values_storage(0, raft::resource::get_cuda_stream(res)), + output_values_storage(0, raft::resource::get_cuda_stream(res)) { set_params(res); } From 88a4b6ee3d0b9f220f8a100ff64d780d26d657b1 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 19 Feb 2026 19:21:42 +0000 Subject: [PATCH 112/158] respond to reviews --- .../jit_lto/NVRTCLTOFragmentCompiler.hpp | 2 + cpp/include/cuvs/neighbors/ivf_flat.hpp | 49 ++++++++++--------- .../jit_lto/NVRTCLTOFragmentCompiler.cu | 21 ++++---- .../ivf_flat_interleaved_scan_jit.cuh | 8 +-- 4 files changed, 43 insertions(+), 37 deletions(-) diff --git a/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp b/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp index 3c14fbd425..a1f598b6a5 100644 --- a/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp +++ b/cpp/include/cuvs/detail/jit_lto/NVRTCLTOFragmentCompiler.hpp @@ -15,3 +15,5 @@ struct NVRTCLTOFragmentCompiler { std::vector standard_compile_opts; }; + +NVRTCLTOFragmentCompiler& nvrtc_compiler(); diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index f5b12b7445..ccb92eb594 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -3053,7 +3053,7 @@ void recompute_internal_state(const raft::resources& res, index inline constexpr bool is_same_v = is_same inline constexpr bool is_same_v = is_same \ - struct NAME : cuvs::neighbors::ivf_flat::udf::metric_interface { \ - using point_type = cuvs::neighbors::ivf_flat::udf::point; \ - __device__ void operator()(AccT& acc, point_type x, point_type y) override { BODY } \ - }; \ - \ - inline std::string NAME##_udf() \ - { \ - using namespace cuvs::neighbors::ivf_flat::udf; \ - std::string result; \ - result += jit_preamble_code; \ - result += point_code; \ - result += squared_diff_code; \ - result += abs_diff_code; \ - result += dot_product_code; \ - result += product_code; \ - result += sum_code; \ - result += max_elem_code; \ - result += metric_interface_code; \ +#define CUVS_METRIC(NAME, BODY) \ + template \ + struct NAME : cuvs::neighbors::ivf_flat::experimental::udf::metric_interface { \ + using point_type = cuvs::neighbors::ivf_flat::experimental::udf::point; \ + __device__ void operator()(AccT& acc, point_type x, point_type y) override { BODY } \ + }; \ + \ + inline std::string NAME##_udf() \ + { \ + using namespace cuvs::neighbors::ivf_flat::experimental::udf; \ + std::string result; \ + result += jit_preamble_code; \ + result += point_code; \ + result += squared_diff_code; \ + result += abs_diff_code; \ + result += dot_product_code; \ + result += product_code; \ + result += sum_code; \ + result += max_elem_code; \ + result += metric_interface_code; \ result += R"( \ template \ struct )" #NAME R"( : metric_interface { \ @@ -3464,11 +3465,11 @@ __device__ __forceinline__ void compute_dist(AccT& acc, AccT x, AccT y) metric(acc, ::point(x), ::point(y)); \ } \ }}}} \ -)"; \ - return result; \ +)"; \ + return result; \ } -} // namespace udf +} // namespace experimental::udf #endif } // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index bc4e1aa4cc..99c1de13f8 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -11,15 +11,12 @@ #include "cuda.h" #include -#define NVRTC_SAFE_CALL(_call) \ - do { \ - nvrtcResult result = _call; \ - if (result != NVRTC_SUCCESS) { \ - std::cerr << "\nerror: " #_call " failed with error " << nvrtcGetErrorString(result) \ - << '\n'; \ - exit(1); \ - } \ - } while (0) +#define NVRTC_SAFE_CALL(_call) \ + { \ + nvrtcResult result = _call; \ + RAFT_EXPECTS(result == NVRTC_SUCCESS, \ + "nvrtc error: " + std::string(nvrtcGetErrorString(result))); \ + } NVRTCLTOFragmentCompiler::NVRTCLTOFragmentCompiler() { @@ -87,3 +84,9 @@ void NVRTCLTOFragmentCompiler::compile(std::string const& key, std::string const registerNVRTCFragment(key, std::move(program), ltoIRSize); } + +NVRTCLTOFragmentCompiler& nvrtc_compiler() +{ + static NVRTCLTOFragmentCompiler compiler; + return compiler; +} diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index 8dc735f03e..1303463f37 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -186,13 +186,13 @@ void launch_kernel(const index& index, metric_udf += type_name(); metric_udf += ");\n"; // Include hash of UDF source in key to differentiate different UDFs - auto udf_hash = std::to_string(std::hash{}(metric_udf)); - std::string metric_name = "metric_udf_" + udf_hash; - NVRTCLTOFragmentCompiler nrtc_lto_compiler; + auto udf_hash = std::to_string(std::hash{}(metric_udf)); + std::string metric_name = "metric_udf_" + udf_hash; + auto& nvrtc_lto_compiler = nvrtc_compiler(); std::string key = metric_name + "_" + std::to_string(Veclen) + "_" + make_fragment_key()), decltype(get_acc_type_tag())>(); - nrtc_lto_compiler.compile(key, metric_udf); + nvrtc_lto_compiler.compile(key, metric_udf); kernel_planner.template add_metric_device_function()), decltype(get_acc_type_tag())>( metric_name, Veclen); From 63c73009ffa2cc046fcd6152437a956d9b6ebc69 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 19 Feb 2026 19:25:03 +0000 Subject: [PATCH 113/158] pin cupy to <14.0 for cuda 12 wheels --- dependencies.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index a725499ec4..67b3d9b3df 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -372,7 +372,7 @@ dependencies: - matrix: cuda: "12.*" packages: - - cupy-cuda12x>=13.6.0 + - cupy-cuda12x>=13.6.0,<14.0 # fallback to CUDA 13 versions if 'cuda' is '13.*' or not provided - matrix: packages: From 0c0b6b5a72be233cb22f16c1f329236b9a8a2cca Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 19 Feb 2026 20:31:01 +0000 Subject: [PATCH 114/158] fix cuda 12 --- .../cagra/search_multi_cta_kernel-inl.cuh | 4 +-- .../detail/cagra/search_multi_kernel.cuh | 30 ++----------------- .../detail/cagra/set_value_batch.cuh | 4 +-- 3 files changed, 6 insertions(+), 32 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 7d643b29f5..81a2df2a77 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -16,6 +16,8 @@ #ifdef CUVS_ENABLE_JIT_LTO #include "search_multi_cta_kernel_launcher_jit.cuh" +#else +#include "set_value_batch.cuh" #endif #include @@ -516,8 +518,6 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel( #endif } -#include "set_value_batch.cuh" - template struct search_kernel_config { // Search kernel function type. Note that the actual values for the template value diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index 025827c112..d70dfb5fae 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -11,6 +11,8 @@ #include #endif +#include "set_value_batch.cuh" + #include "compute_distance-ext.cuh" #include "device_common.hpp" #include "hashmap.hpp" @@ -614,34 +616,6 @@ void batched_memcpy(T* const dst, // [batch_size, ld_dst] <<>>(dst, ld_dst, src, ld_src, count, batch_size); } -template -RAFT_KERNEL set_value_batch_kernel(T* const dev_ptr, - const std::size_t ld, - const T val, - const std::size_t count, - const std::size_t batch_size) -{ - const auto tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= count * batch_size) { return; } - const auto batch_id = tid / count; - const auto elem_id = tid % count; - dev_ptr[elem_id + ld * batch_id] = val; -} - -template -void set_value_batch(T* const dev_ptr, - const std::size_t ld, - const T val, - const std::size_t count, - const std::size_t batch_size, - cudaStream_t cuda_stream) -{ - constexpr std::uint32_t block_size = 256; - const auto grid_size = (count * batch_size + block_size - 1) / block_size; - set_value_batch_kernel - <<>>(dev_ptr, ld, val, count, batch_size); -} - // result_buffer (work buffer) for "multi-kernel" // +--------------------+------------------------------+-------------------+ // | internal_top_k (A) | neighbors of internal_top_k | internal_topk (B) | diff --git a/cpp/src/neighbors/detail/cagra/set_value_batch.cuh b/cpp/src/neighbors/detail/cagra/set_value_batch.cuh index c778429ac8..a4433005a7 100644 --- a/cpp/src/neighbors/detail/cagra/set_value_batch.cuh +++ b/cpp/src/neighbors/detail/cagra/set_value_batch.cuh @@ -7,7 +7,7 @@ #include #include -namespace cuvs::neighbors::cagra::detail::multi_cta_search { +namespace cuvs::neighbors::cagra::detail { template __global__ void set_value_batch_kernel(T* const dev_ptr, @@ -37,4 +37,4 @@ void set_value_batch(T* const dev_ptr, <<>>(dev_ptr, ld, val, count, batch_size); } -} // namespace cuvs::neighbors::cagra::detail::multi_cta_search +} // namespace cuvs::neighbors::cagra::detail From faa93397a13c95a632d98ba1f2e6276419149b54 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 19 Feb 2026 20:34:09 +0000 Subject: [PATCH 115/158] add includes --- cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index 99c1de13f8..faf78a8d5a 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -6,7 +6,7 @@ #include #include -#include +#include #include "cuda.h" #include @@ -68,9 +68,7 @@ void NVRTCLTOFragmentCompiler::compile(std::string const& key, std::string const NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &log_size)); std::unique_ptr log{new char[log_size]}; NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log.get())); - std::cerr << "nvrtrc compile error log: \n"; - std::cerr << log.get() << '\n'; - exit(1); + RAFT_FAIL("nvrtc compile error log: \n", log.get()); } // Obtain generated LTO IR from the program. From 73e8fa094cab350cbee72b725e186dd8434a7f46 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 19 Feb 2026 20:46:11 +0000 Subject: [PATCH 116/158] fix logging --- .../detail/jit_lto/NVRTCLTOFragmentCompiler.cu | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index faf78a8d5a..95e7a4805d 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -11,12 +11,13 @@ #include "cuda.h" #include -#define NVRTC_SAFE_CALL(_call) \ - { \ - nvrtcResult result = _call; \ - RAFT_EXPECTS(result == NVRTC_SUCCESS, \ - "nvrtc error: " + std::string(nvrtcGetErrorString(result))); \ - } +#define NVRTC_SAFE_CALL(_call) \ + { \ + nvrtcResult result = _call; \ + std::string error_string = \ + std::string("nvrtc error: ") + std::string(nvrtcGetErrorString(result)); +RAFT_EXPECTS(result == NVRTC_SUCCESS, error_string.c_str()); +} NVRTCLTOFragmentCompiler::NVRTCLTOFragmentCompiler() { @@ -68,7 +69,7 @@ void NVRTCLTOFragmentCompiler::compile(std::string const& key, std::string const NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &log_size)); std::unique_ptr log{new char[log_size]}; NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log.get())); - RAFT_FAIL("nvrtc compile error log: \n", log.get()); + RAFT_FAIL("nvrtc compile error log: \n%s", log.get()); } // Obtain generated LTO IR from the program. From fef68d3a6ac0a05f2a473e5e4ef5c8b8d1540cc1 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 19 Feb 2026 21:16:19 +0000 Subject: [PATCH 117/158] fix macro --- cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index 95e7a4805d..aafd1c868e 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -11,13 +11,13 @@ #include "cuda.h" #include -#define NVRTC_SAFE_CALL(_call) \ - { \ - nvrtcResult result = _call; \ - std::string error_string = \ - std::string("nvrtc error: ") + std::string(nvrtcGetErrorString(result)); -RAFT_EXPECTS(result == NVRTC_SUCCESS, error_string.c_str()); -} +#define NVRTC_SAFE_CALL(_call) \ + { \ + nvrtcResult result = _call; \ + std::string error_string = \ + std::string("nvrtc error: ") + std::string(nvrtcGetErrorString(result)); \ + RAFT_EXPECTS(result == NVRTC_SUCCESS, error_string.c_str()); \ + } NVRTCLTOFragmentCompiler::NVRTCLTOFragmentCompiler() { From 05cc1499ddbafdc593164d5bcc1003931f61352a Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 20 Feb 2026 00:06:09 +0000 Subject: [PATCH 118/158] major refactor to reduce # of fragments --- .../modules/generate_jit_lto_kernels.cmake | 249 ------------------ .../detail/cagra/compute_distance.hpp | 2 + ...mpute_distance_to_child_nodes_kernel.cu.in | 4 +- .../descriptor_accessors_standard_impl.cuh | 110 -------- .../descriptor_accessors_vpq_impl.cuh | 151 ----------- .../jit_lto_kernels/device_common_jit.cuh | 48 +--- .../extern_device_functions.cuh | 76 +----- .../jit_lto_kernels/get_args_standard.cu.in | 39 --- .../cagra/jit_lto_kernels/get_args_vpq.cu.in | 39 --- .../jit_lto_kernels/get_dim_standard.cu.in | 38 --- .../cagra/jit_lto_kernels/get_dim_vpq.cu.in | 38 --- .../jit_lto_kernels/get_size_standard.cu.in | 38 --- .../cagra/jit_lto_kernels/get_size_vpq.cu.in | 38 --- .../get_smem_ws_size_in_bytes_standard.cu.in | 38 --- .../get_smem_ws_size_in_bytes_vpq.cu.in | 38 --- ...eam_size_bitshift_from_smem_standard.cu.in | 38 --- ...get_team_size_bitshift_from_smem_vpq.cu.in | 38 --- .../get_team_size_bitshift_standard.cu.in | 38 --- .../get_team_size_bitshift_vpq.cu.in | 38 --- .../random_pickup_kernel.cu.in | 4 +- .../search_multi_cta_kernel.cu.in | 4 +- .../search_multi_cta_kernel_jit.cuh | 44 ++-- .../search_multi_cta_planner.hpp | 74 ------ .../search_multi_kernel_jit.cuh | 82 +++--- .../search_multi_kernel_planner.hpp | 81 ------ .../search_single_cta_kernel.cu.in | 4 +- .../search_single_cta_kernel_jit.cuh | 70 ++--- .../search_single_cta_kernel_p.cu.in | 4 +- .../search_single_cta_planner.hpp | 81 ------ .../jit_lto_kernels/setup_workspace_impl.cuh | 11 +- .../setup_workspace_standard.cu.in | 4 +- .../setup_workspace_standard_impl_unified.cuh | 18 +- .../jit_lto_kernels/setup_workspace_vpq.cu.in | 4 +- .../setup_workspace_vpq_impl_unified.cuh | 18 +- .../search_multi_cta_kernel_launcher_jit.cuh | 109 +------- .../detail/cagra/search_multi_kernel.cuh | 39 +-- .../search_multi_kernel_launcher_jit.cuh | 146 +--------- .../search_single_cta_kernel_launcher_jit.cuh | 93 +------ .../detail/cagra/shared_launcher_jit.hpp | 18 +- 39 files changed, 150 insertions(+), 1856 deletions(-) delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_standard_impl.cuh delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_vpq_impl.cuh delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_standard.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 46c47ed683..3ba140e337 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -307,255 +307,6 @@ function(generate_jit_lto_kernels target) endforeach() endforeach() - # Generate descriptor accessor fragments for standard descriptors These fragments provide get_dim, - # get_size, get_team_size_bitshift, get_args, get_smem_ws_size_in_bytes - foreach(data_idx IN ITEMS 0 1 2 3) - list(GET cagra_data_types ${data_idx} data_type) - list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) - foreach(team_size IN LISTS cagra_team_sizes) - foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # get_dim_standard - set(kernel_name - "get_dim_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - set(data_type "${data_type}") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_size_standard - set(kernel_name - "get_size_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_team_size_bitshift_standard - set(kernel_name - "get_team_size_bitshift_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_team_size_bitshift_from_smem_standard - set(kernel_name - "get_team_size_bitshift_from_smem_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_args_standard - set(kernel_name - "get_args_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_args_standard.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_smem_ws_size_in_bytes_standard - set(kernel_name - "get_smem_ws_size_in_bytes_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - endforeach() - endforeach() - endforeach() - - # Generate descriptor accessor fragments for VPQ descriptors - foreach(data_idx IN ITEMS 0 1 2 3) - list(GET cagra_data_types ${data_idx} data_type) - list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) - foreach(team_size IN LISTS cagra_team_sizes) - foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - foreach(pq_len IN LISTS cagra_pq_lens) - # get_dim_vpq - set(kernel_name - "get_dim_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - set(pq_bits "${cagra_pq_bits}") - set(codebook_type "${cagra_codebook_type}") - set(data_type "${data_type}") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_size_vpq - set(kernel_name - "get_size_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_team_size_bitshift_vpq - set(kernel_name - "get_team_size_bitshift_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_team_size_bitshift_from_smem_vpq - set(kernel_name - "get_team_size_bitshift_from_smem_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_args_vpq - set(kernel_name - "get_args_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - - # get_smem_ws_size_in_bytes_vpq - set(kernel_name - "get_smem_ws_size_in_bytes_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - endforeach() - endforeach() - endforeach() - endforeach() - # Generate dist_op fragments for each metric Note: InnerProduct and CosineExpanded both use # inner_product dist_op, so we only generate it once foreach(data_idx IN ITEMS 0 1 2 3) diff --git a/cpp/src/neighbors/detail/cagra/compute_distance.hpp b/cpp/src/neighbors/detail/cagra/compute_distance.hpp index 1548a61761..2466795514 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/src/neighbors/detail/cagra/compute_distance.hpp @@ -192,6 +192,7 @@ struct alignas(device::LOAD_128BIT_T) dataset_descriptor_base_t { return smem_and_team_size.team_size(); } +#if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) RAFT_DEVICE_INLINE_FUNCTION auto setup_workspace(void* smem_ptr, const DATA_T* queries_ptr, uint32_t query_id) const -> const base_type* @@ -205,6 +206,7 @@ struct alignas(device::LOAD_128BIT_T) dataset_descriptor_base_t { auto per_thread_distances = valid ? compute_distance_impl(args.load(), dataset_index) : 0; return device::team_sum(per_thread_distances, team_size_bitshift_from_smem()); } +#endif }; /** diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index f8bea170a6..685d4cefdd 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -15,9 +15,9 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { // For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void // For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ void compute_distance_to_child_nodes_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@, cuvs::neighbors::filtering::none_sample_filter>( - const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, void*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); + const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_standard_impl.cuh deleted file mode 100644 index 7001c630ae..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_standard_impl.cuh +++ /dev/null @@ -1,110 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "../compute_distance_standard-impl.cuh" - -namespace cuvs::neighbors::cagra::detail { - -// Descriptor accessor fragments for standard descriptors -// These take void* and reconstruct the descriptor pointer, then return the member -// Same function names as VPQ versions - planner links the right fragment at runtime -// Uses unified template parameters (PQ_BITS=0, PQ_LEN=0, CodebookT=void for standard descriptors) - -template -__device__ uint32_t get_dim(void* desc_ptr) -{ - using desc_t = standard_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - return desc->args.dim; -} - -template -__device__ IndexT get_size(void* desc_ptr) -{ - using desc_t = standard_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - return desc->size; -} - -template -__device__ uint32_t get_team_size_bitshift(void* desc_ptr) -{ - using desc_t = standard_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - // Use team_size_bitshift() which works for both global and shared memory descriptors - // team_size_bitshift_from_smem() only works when descriptor is in shared memory - return desc->team_size_bitshift(); -} - -template -__device__ uint32_t get_team_size_bitshift_from_smem(void* desc_ptr) -{ - using desc_t = standard_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - // Use team_size_bitshift_from_smem() which is optimized for shared memory access - return desc->team_size_bitshift_from_smem(); -} - -template -__device__ typename dataset_descriptor_base_t::args_t get_args( - void* desc_ptr) -{ - using desc_t = standard_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - return desc->args.load(); -} - -template -__device__ uint32_t get_smem_ws_size_in_bytes(void* desc_ptr, uint32_t dim) -{ - using desc_t = standard_dataset_descriptor_t; - return desc_t::get_smem_ws_size_in_bytes(dim); -} - -} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_vpq_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_vpq_impl.cuh deleted file mode 100644 index f70341cb03..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/descriptor_accessors_vpq_impl.cuh +++ /dev/null @@ -1,151 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "../compute_distance_vpq-impl.cuh" - -namespace cuvs::neighbors::cagra::detail { - -// Descriptor accessor fragments for VPQ descriptors -// These take void* and reconstruct the descriptor pointer, then return the member -// Same function names as standard versions - planner links the right fragment at runtime - -template -__device__ uint32_t get_dim(void* desc_ptr) -{ - using desc_t = cagra_q_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - return desc->args.dim; -} - -template -__device__ IndexT get_size(void* desc_ptr) -{ - using desc_t = cagra_q_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - return desc->size; -} - -template -__device__ uint32_t get_team_size_bitshift(void* desc_ptr) -{ - using desc_t = cagra_q_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - // Use team_size_bitshift() which works for both global and shared memory descriptors - // team_size_bitshift_from_smem() only works when descriptor is in shared memory - return desc->team_size_bitshift(); -} - -template -__device__ uint32_t get_team_size_bitshift_from_smem(void* desc_ptr) -{ - using desc_t = cagra_q_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - // Use team_size_bitshift_from_smem() which is optimized for shared memory access - return desc->team_size_bitshift_from_smem(); -} - -template -__device__ typename dataset_descriptor_base_t::args_t get_args( - void* desc_ptr) -{ - using desc_t = cagra_q_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); - return desc->args.load(); -} - -template -__device__ uint32_t get_smem_ws_size_in_bytes(void* desc_ptr, uint32_t dim) -{ - using desc_t = cagra_q_dataset_descriptor_t; - return desc_t::get_smem_ws_size_in_bytes(dim); -} - -} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh index 91be40c64f..dfe93d300f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh @@ -30,7 +30,7 @@ struct has_kpq_bits { template inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; -// JIT version of compute_distance_to_random_nodes - uses extern functions with void* descriptor +// JIT version of compute_distance_to_random_nodes - uses dataset_descriptor_base_t* pointer // Shared between single_cta and multi_cta JIT kernels // Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT template * smem_desc, const uint32_t num_pickup, const uint32_t num_distilation, const uint64_t rand_xor_mask, @@ -59,26 +59,14 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( { constexpr unsigned warp_size = 32; - // Get team_size_bits and args using accessor fragments - // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type + // Get team_size_bits and args directly from base descriptor using args_t = typename cuvs::neighbors::cagra::detail:: dataset_descriptor_base_t::args_t; - // Use get_team_size_bitshift_from_smem since smem_desc is in shared memory - uint32_t team_size_bits = get_team_size_bitshift_from_smem(smem_desc); - args_t args = - get_args( - smem_desc); - IndexT dataset_size = - get_size( - smem_desc); + // Use team_size_bitshift_from_smem since smem_desc is in shared memory + uint32_t team_size_bits = smem_desc->team_size_bitshift_from_smem(); + args_t args = smem_desc->args.load(); + IndexT dataset_size = smem_desc->size; const auto max_i = raft::round_up_safe(num_pickup, warp_size >> team_size_bits); @@ -144,7 +132,7 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( } } -// JIT version of compute_distance_to_child_nodes - uses extern functions with void* descriptor +// JIT version of compute_distance_to_child_nodes - uses dataset_descriptor_base_t* pointer // Shared between single_cta and multi_cta JIT kernels // Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT template * smem_desc, const IndexT* __restrict__ knn_graph, const uint32_t knn_k, IndexT* __restrict__ visited_hashmap_ptr, @@ -205,23 +193,13 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( // Compute the distance to child nodes using unified extern compute_distance constexpr unsigned warp_size = 32; - // Get team_size_bits and args using accessor fragments - // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type + // Get team_size_bits and args directly from base descriptor using args_t = typename cuvs::neighbors::cagra::detail:: dataset_descriptor_base_t::args_t; - // Use get_team_size_bitshift_from_smem since smem_desc is in shared memory - uint32_t team_size_bits = get_team_size_bitshift_from_smem(smem_desc); - args_t args = - get_args( - smem_desc); + // Use team_size_bitshift_from_smem since smem_desc is in shared memory + uint32_t team_size_bits = smem_desc->team_size_bitshift_from_smem(); + args_t args = smem_desc->args.load(); const auto num_k = knn_k * search_width; const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bits); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh index e57a5ebf06..737e466aac 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -38,73 +38,8 @@ namespace cuvs::neighbors::cagra::detail { // All extern function declarations are in the cuvs::neighbors::cagra::detail namespace // so they can be used by all search modes without being beholden to any specific sub-namespace -// Descriptor accessor extern functions (standard and VPQ versions use same function names) -// These take void* and reconstruct the descriptor inside -// Planner links the right fragment (standard or VPQ) at runtime based on descriptor type -// Uses unified template parameters: for standard descriptors, PQ_BITS=0, PQ_LEN=0, CodebookT=void -template -extern __device__ uint32_t get_dim(void* desc_ptr); - -template -extern __device__ IndexT get_size(void* desc_ptr); - -template -extern __device__ uint32_t get_team_size_bitshift(void* desc_ptr); - -template -extern __device__ uint32_t get_team_size_bitshift_from_smem(void* desc_ptr); - -template -extern __device__ typename dataset_descriptor_base_t::args_t get_args( - void* desc_ptr); - -template -extern __device__ uint32_t get_smem_ws_size_in_bytes(void* desc_ptr, uint32_t dim); - // Unified setup_workspace and compute_distance extern functions -// These take void* and reconstruct the descriptor inside +// These take dataset_descriptor_base_t* and reconstruct the derived descriptor inside // Standard and VPQ versions are in separate impl headers but use the same function name template -extern __device__ void* setup_workspace(void* desc_ptr, - void* smem, - const DataT* queries, - uint32_t query_id); +extern __device__ dataset_descriptor_base_t* setup_workspace( + dataset_descriptor_base_t* desc_ptr, + void* smem, + const DataT* queries, + uint32_t query_id); template - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_args function for standard descriptor -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ args_t get_args<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_args_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_args_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_args_standard_t@team_size@_dim@dataset_block_dim@", - embedded_get_args_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_args_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in deleted file mode 100644 index dbe6fef376..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_args_vpq.cu.in +++ /dev/null @@ -1,39 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_args function for VPQ descriptor -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ args_t get_args<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_args_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in deleted file mode 100644 index a27c7c78ac..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_standard.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_dim function for standard descriptor -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -template __device__ uint32_t get_dim<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_dim_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_dim_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_dim_standard_t@team_size@_dim@dataset_block_dim@", - embedded_get_dim_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_dim_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in deleted file mode 100644 index 61db38311f..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_dim_vpq.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_dim function for VPQ descriptor -template __device__ uint32_t get_dim<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_dim_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in deleted file mode 100644 index 7ab5f9f702..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_standard.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_size function for standard descriptor -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -template __device__ @index_type@ get_size<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_size_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_size_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_size_standard_t@team_size@_dim@dataset_block_dim@", - embedded_get_size_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_size_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in deleted file mode 100644 index 18c5610048..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_size_vpq.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_size function for VPQ descriptor -template __device__ @index_type@ get_size<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_size_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in deleted file mode 100644 index 7ae94bf363..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_standard.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_smem_ws_size_in_bytes function for standard descriptor -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -template __device__ uint32_t get_smem_ws_size_in_bytes<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*, uint32_t); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@", - embedded_get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_smem_ws_size_in_bytes_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in deleted file mode 100644 index b4332a0fc2..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_smem_ws_size_in_bytes_vpq.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_smem_ws_size_in_bytes function for VPQ descriptor -template __device__ uint32_t get_smem_ws_size_in_bytes<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*, uint32_t); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_smem_ws_size_in_bytes_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in deleted file mode 100644 index 21410aee3e..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_standard.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_team_size_bitshift_from_smem function for standard descriptor -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -template __device__ uint32_t get_team_size_bitshift_from_smem<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@", - embedded_get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_team_size_bitshift_from_smem_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in deleted file mode 100644 index 2ea5492275..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_from_smem_vpq.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_team_size_bitshift_from_smem function for VPQ descriptor -template __device__ uint32_t get_team_size_bitshift_from_smem<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_team_size_bitshift_from_smem_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in deleted file mode 100644 index ab288d05b7..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_standard.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_team_size_bitshift function for standard descriptor -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -template __device__ uint32_t get_team_size_bitshift<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@", - embedded_get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_team_size_bitshift_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in deleted file mode 100644 index cae812ca7d..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/get_team_size_bitshift_vpq.cu.in +++ /dev/null @@ -1,38 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the get_team_size_bitshift function for VPQ descriptor -template __device__ uint32_t get_team_size_bitshift<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>(void*); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_get_team_size_bitshift_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in index e02cfb08a9..c96bc04771 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -15,9 +15,9 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { // For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void // For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ void random_pickup_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( - void*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index 1359101627..0069b0a98f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -18,9 +18,9 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { // For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void // For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - @index_type@* const, @distance_type@* const, void*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); + @index_type@* const, @distance_type@* const, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh index 46270421b7..1e79d54835 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -42,7 +42,7 @@ using cuvs::neighbors::detail::sample_filter; using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; -// JIT version of search_kernel - uses extern functions with void* descriptor pointer +// JIT version of search_kernel - uses dataset_descriptor_base_t* pointer // Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT // Filter is linked separately via JIT LTO, so we use none_sample_filter directly template * dataset_desc, + const DataT* const queries_ptr, // [num_queries, dataset_dim] + const IndexT* const knn_graph, // [dataset_size, graph_degree] const uint32_t max_elements, const uint32_t graph_degree, const SourceIndexT* source_indices_ptr, // [num_queries, search_width] @@ -119,30 +119,22 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( const auto result_buffer_size_32 = raft::round_up_safe(result_buffer_size, 32); assert(result_buffer_size_32 <= max_elements); - // Get dim and smem_ws_size_in_bytes using accessor fragments - // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type - uint32_t dim = - get_dim( - dataset_desc); - uint32_t smem_ws_size_in_bytes = get_smem_ws_size_in_bytes(dataset_desc, dim); + // Get dim and smem_ws_size_in_bytes directly from base descriptor + uint32_t dim = dataset_desc->args.dim; + uint32_t smem_ws_size_in_bytes = dataset_desc->smem_ws_size_in_bytes(); // Set smem working buffer using unified setup_workspace - // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type - void* smem_desc = setup_workspace(dataset_desc, smem, queries_ptr, query_id); + // setup_workspace copies the descriptor to shared memory and returns base pointer to smem + // descriptor + dataset_descriptor_base_t* smem_desc = + setup_workspace(dataset_desc, smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index c4d498827f..188a5e481d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -150,80 +150,6 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { this->device_functions.push_back("sample_filter_" + filter_name); } - void add_descriptor_accessor_device_functions(uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits = 0, - uint32_t pq_len = 0) - { - // Register all descriptor accessor fragments (get_dim, get_size, get_team_size_bitshift, - // get_args, get_smem_ws_size_in_bytes) - // These fragments allow kernels to access descriptor members via void* pointers - if (is_vpq) { - using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - auto params = make_fragment_key(); - std::string base = "get_dim_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_size_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - // Multi CTA kernels only use shared memory descriptors (after setup_workspace), - // so we only need get_team_size_bitshift_from_smem - base = "get_team_size_bitshift_from_smem_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_args_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_smem_ws_size_in_bytes_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - } else { - auto params = make_fragment_key(); - std::string base = "get_dim_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_size_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - // Multi CTA kernels only use shared memory descriptors (after setup_workspace), - // so we only need get_team_size_bitshift_from_smem - base = "get_team_size_bitshift_from_smem_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_args_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_smem_ws_size_in_bytes_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - } - } - private: std::string entrypoint_name_; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh index 3a4ccde06d..511612e0b8 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh @@ -36,7 +36,7 @@ struct has_kpq_bits { template inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; -// JIT version of random_pickup_kernel - uses extern functions with void* descriptor pointer +// JIT version of random_pickup_kernel - uses dataset_descriptor_base_t* pointer // Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT template RAFT_KERNEL random_pickup_kernel_jit( - void* dataset_desc, // void* descriptor pointer (reconstructed in fragments) + dataset_descriptor_base_t* dataset_desc, const DataT* const queries_ptr, // [num_queries, dataset_dim] const std::size_t num_pickup, const unsigned num_distilation, @@ -64,15 +64,8 @@ RAFT_KERNEL random_pickup_kernel_jit( using INDEX_T = IndexT; using DISTANCE_T = DistanceT; - // Get team_size_bits using accessor fragment (planner links the right fragment at runtime) - uint32_t team_size_bits = get_team_size_bitshift(dataset_desc); + // Get team_size_bits directly from base descriptor + uint32_t team_size_bits = dataset_desc->team_size_bitshift(); const auto ldb = hashmap::get_size(hash_bitlen); const auto global_team_index = (blockIdx.x * blockDim.x + threadIdx.x) >> team_size_bits; @@ -81,27 +74,24 @@ RAFT_KERNEL random_pickup_kernel_jit( extern __shared__ uint8_t smem[]; // Set smem working buffer using unified setup_workspace - // setup_workspace copies the descriptor to shared memory and returns void* to smem descriptor - // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - void* smem_desc = setup_workspace(dataset_desc, smem, queries_ptr, query_id); + // setup_workspace copies the descriptor to shared memory and returns base pointer to smem + // descriptor NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) + dataset_descriptor_base_t* smem_desc = + setup_workspace(dataset_desc, smem, queries_ptr, query_id); __syncthreads(); // Load args once for better performance (avoid repeated loads in the loop) using args_t = typename cuvs::neighbors::cagra::detail:: dataset_descriptor_base_t::args_t; - args_t args = - get_args( - smem_desc); - IndexT dataset_size = - get_size( - smem_desc); + args_t args = smem_desc->args.load(); + IndexT dataset_size = smem_desc->size; INDEX_T best_index_team_local; DISTANCE_T best_norm2_team_local = utils::get_max_value(); @@ -167,7 +157,7 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( DistanceT* const parent_distance_ptr, // [num_queries, search_width] const std::size_t lds, const std::uint32_t search_width, - void* dataset_desc, // void* descriptor pointer (reconstructed in fragments) + dataset_descriptor_base_t* dataset_desc, const IndexT* const neighbor_graph_ptr, // [dataset_size, graph_degree] const std::uint32_t graph_degree, const SourceIndexT* source_indices_ptr, @@ -182,15 +172,8 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( using INDEX_T = IndexT; using DISTANCE_T = DistanceT; - // Get team_size_bits using accessor fragment (planner links the right fragment at runtime) - uint32_t team_size_bits = get_team_size_bitshift(dataset_desc); + // Get team_size_bits directly from base descriptor + uint32_t team_size_bits = dataset_desc->team_size_bitshift(); const auto team_size = 1u << team_size_bits; const uint32_t ldb = hashmap::get_size(hash_bitlen); @@ -200,16 +183,17 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( extern __shared__ uint8_t smem[]; // Load a query using unified setup_workspace - // setup_workspace copies the descriptor to shared memory and returns void* to smem descriptor - // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - void* smem_desc = setup_workspace(dataset_desc, smem, query_ptr, query_id); + // setup_workspace copies the descriptor to shared memory and returns base pointer to smem + // descriptor NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) + dataset_descriptor_base_t* smem_desc = + setup_workspace(dataset_desc, smem, query_ptr, query_id); __syncthreads(); if (global_team_id >= search_width * graph_degree) { return; } @@ -238,9 +222,7 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( // Load args once for better performance (avoid repeated loads) using args_t = typename cuvs::neighbors::cagra::detail:: dataset_descriptor_base_t::args_t; - args_t args = - get_args( - smem_desc); + args_t args = smem_desc->args.load(); // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum // Otherwise warp shuffles will hang. Each thread calls the unified extern function to get diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index c7641a9bd3..ba44a5d9cf 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -153,87 +153,6 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { this->device_functions.push_back("sample_filter_" + filter_name); } - void add_descriptor_accessor_device_functions(uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits = 0, - uint32_t pq_len = 0) - { - // Register all descriptor accessor fragments (get_dim, get_size, get_team_size_bitshift, - // get_args, get_smem_ws_size_in_bytes) - // These fragments allow kernels to access descriptor members via void* pointers - if (is_vpq) { - using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - auto params = make_fragment_key(); - std::string base = "get_dim_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_size_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_team_size_bitshift_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_team_size_bitshift_from_smem_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_args_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_smem_ws_size_in_bytes_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - } else { - auto params = make_fragment_key(); - std::string base = "get_dim_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_size_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_team_size_bitshift_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_team_size_bitshift_from_smem_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_args_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_smem_ws_size_in_bytes_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - } - } - void set_entrypoint_name(const std::string& name) { entrypoint_name_ = name; } private: diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index 7afbb5400f..24fb9f0dab 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -15,9 +15,9 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { // For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void // For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, void*, uint32_t*, @source_index_type@, @source_index_type@); + uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh index f500ef8eeb..0162f24db3 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -84,7 +84,7 @@ using cuvs::neighbors::detail::sample_filter; using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; -// JIT version of search_core - uses extern functions with void* descriptor pointer +// JIT version of search_core - uses dataset_descriptor_base_t* pointer // Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT // For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void // For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half @@ -124,10 +124,10 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( const std::uint32_t small_hash_reset_interval, const std::uint32_t query_id, const std::uint32_t query_id_offset, // Offset to add to query_id when calling filter - void* dataset_desc, // void* descriptor pointer (reconstructed in fragments) - uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) - SourceIndexT bitset_len, // Bitset length - SourceIndexT original_nbits) // Original number of bits + dataset_descriptor_base_t* dataset_desc, + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits { using LOAD_T = device::LOAD_128BIT_T; @@ -159,32 +159,22 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( const auto result_buffer_size_32 = raft::round_up_safe(result_buffer_size, 32); const auto small_hash_size = hashmap::get_size(small_hash_bitlen); - // Get dim using accessor fragment (reconstructs descriptor from void*) - // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type - uint32_t dim = - get_dim( - dataset_desc); - uint32_t smem_ws_size_in_bytes = get_smem_ws_size_in_bytes(dataset_desc, dim); + // Get dim and smem_ws_size directly from base descriptor + uint32_t dim = dataset_desc->args.dim; + uint32_t smem_ws_size_in_bytes = dataset_desc->smem_ws_size_in_bytes(); // Set smem working buffer using unified setup_workspace - // setup_workspace copies the descriptor to shared memory and returns void* to smem descriptor - // NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - // Planner links the right fragment (standard or VPQ) at runtime based on descriptor type - void* smem_desc = setup_workspace(dataset_desc, smem, queries_ptr, query_id); + // setup_workspace copies the descriptor to shared memory and returns base pointer to smem + // descriptor NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) + dataset_descriptor_base_t* smem_desc = + setup_workspace(dataset_desc, smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); @@ -220,10 +210,8 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( // compute distance to randomly selecting nodes using JIT version _CLK_START(); const IndexT* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; - // Get dataset_size using accessor fragment (planner links the right fragment at runtime) - IndexT dataset_size = - get_size( - smem_desc); + // Get dataset_size directly from base descriptor + IndexT dataset_size = smem_desc->size; compute_distance_to_random_nodes_jit* dataset_desc, + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits { const auto query_id = blockIdx.y; search_core* dataset_desc, + uint32_t* bitset_ptr, // Bitset data pointer (nullptr for none_filter) + SourceIndexT bitset_len, // Bitset length + SourceIndexT original_nbits) // Original number of bits { using job_desc_type = job_desc_t>; __shared__ typename job_desc_type::input_t job_descriptor; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in index e8b4565347..41eb6bd076 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in @@ -15,9 +15,9 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { // For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void // For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Descriptor type is no longer instantiated here - kernels use void* and accessor fragments +// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( - worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, void*, uint32_t*, @source_index_type@, @source_index_type@); + worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index bfd484a957..c482d2b593 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -51,13 +51,6 @@ struct CagraSearchPlanner : AlgorithmPlanner { cuvs::neighbors::cagra::detail::tag_codebook_half>() : make_fragment_key()) { - std::string kernel_type = persistent ? "persistent" : "regular"; - std::cerr << "[JIT] CagraSearchPlanner created for " << kernel_type - << " JIT kernel (topk_by_bitonic_sort=" << bool_to_string(topk_by_bitonic_sort) - << ", bitonic_sort_and_merge_multi_warps=" - << bool_to_string(bitonic_sort_and_merge_multi_warps) - << ", metric=" << metric_to_string(metric) << ")" << std::endl; - std::cerr.flush(); } void add_setup_workspace_device_function(cuvs::distance::DistanceType metric, @@ -168,80 +161,6 @@ struct CagraSearchPlanner : AlgorithmPlanner { this->device_functions.push_back("sample_filter_" + filter_name); } - void add_descriptor_accessor_device_functions(uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits = 0, - uint32_t pq_len = 0) - { - // Register all descriptor accessor fragments (get_dim, get_size, get_team_size_bitshift, - // get_args, get_smem_ws_size_in_bytes) - // These fragments allow kernels to access descriptor members via void* pointers - if (is_vpq) { - using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - auto params = make_fragment_key(); - std::string base = "get_dim_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_size_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - // Single CTA kernels only use shared memory descriptors (after setup_workspace), - // so we only need get_team_size_bitshift_from_smem - base = "get_team_size_bitshift_from_smem_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_args_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_smem_ws_size_in_bytes_vpq_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - base += "_" + params; - this->device_functions.push_back(base); - } else { - auto params = make_fragment_key(); - std::string base = "get_dim_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_size_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - // Single CTA kernels only use shared memory descriptors (after setup_workspace), - // so we only need get_team_size_bitshift_from_smem - base = "get_team_size_bitshift_from_smem_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_args_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - - base = "get_smem_ws_size_in_bytes_standard_t" + std::to_string(team_size); - base += "_dim" + std::to_string(dataset_block_dim); - base += "_" + params; - this->device_functions.push_back(base); - } - } - private: static std::string build_entrypoint_name(cuvs::distance::DistanceType metric, bool topk_by_bitonic_sort, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh index c40400ed95..a4ffccfb60 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh @@ -9,7 +9,7 @@ namespace cuvs::neighbors::cagra::detail { -// Unified setup_workspace function - takes void* and template parameters +// Unified setup_workspace function - takes dataset_descriptor_base_t* and template parameters // Standard and VPQ versions are in separate impl headers but use the same function name // The planner links the appropriate fragment at runtime based on PQ_BITS/PQ_LEN template -extern __device__ void* setup_workspace(void* desc_ptr, - void* smem, - const DataT* queries, - uint32_t query_id); +extern __device__ dataset_descriptor_base_t* setup_workspace( + dataset_descriptor_base_t* desc_ptr, + void* smem, + const DataT* queries, + uint32_t query_id); } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in index 204bd4e285..ffb86f7696 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in @@ -13,8 +13,8 @@ namespace cuvs::neighbors::cagra::detail { // Instantiate the unified setup_workspace function for standard descriptor // PQ_BITS=0, PQ_LEN=0, CodebookT=void for standard descriptors -template __device__ void* setup_workspace<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>( - void*, void*, const @data_type@*, uint32_t); +template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>( + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh index cc43e12b50..539dbf1fdb 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh @@ -13,7 +13,7 @@ namespace cuvs::neighbors::cagra::detail { // Unified setup_workspace implementation for standard descriptors // This is instantiated when PQ_BITS=0, PQ_LEN=0, CodebookT=void -// Takes void* and reconstructs the descriptor inside +// Takes dataset_descriptor_base_t* and reconstructs the derived descriptor inside template -__device__ void* setup_workspace(void* desc_ptr, - void* smem, - const DataT* queries, - uint32_t query_id) +__device__ dataset_descriptor_base_t* setup_workspace( + dataset_descriptor_base_t* desc_ptr, + void* smem, + const DataT* queries, + uint32_t query_id) { // For standard descriptors, PQ_BITS=0, PQ_LEN=0, CodebookT=void static_assert(PQ_BITS == 0 && PQ_LEN == 0 && std::is_same_v, "Standard descriptor requires PQ_BITS=0, PQ_LEN=0, CodebookT=void"); - // Reconstruct the descriptor pointer from void* + // Reconstruct the descriptor pointer from base pointer using desc_t = standard_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); + const desc_t* desc = static_cast(desc_ptr); // Call the free function directly - it takes DescriptorT as template parameter const desc_t* result = setup_workspace_standard(desc, smem, queries, query_id); - return const_cast(result); + return const_cast*>( + static_cast*>(result)); } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in index 394a86992c..694697c075 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in @@ -13,8 +13,8 @@ namespace cuvs::neighbors::cagra::detail { // Instantiate the unified setup_workspace function for VPQ descriptor // PQ_BITS>0, PQ_LEN>0, CodebookT=half for VPQ descriptors -template __device__ void* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( - void*, void*, const @data_type@*, uint32_t); +template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl_unified.cuh index 40b802f468..ac388ad341 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl_unified.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl_unified.cuh @@ -13,7 +13,7 @@ namespace cuvs::neighbors::cagra::detail { // Unified setup_workspace implementation for VPQ descriptors // This is instantiated when PQ_BITS>0, PQ_LEN>0, CodebookT=half -// Takes void* and reconstructs the descriptor inside +// Takes dataset_descriptor_base_t* and reconstructs the derived descriptor inside template -__device__ void* setup_workspace(void* desc_ptr, - void* smem, - const DataT* queries, - uint32_t query_id) +__device__ dataset_descriptor_base_t* setup_workspace( + dataset_descriptor_base_t* desc_ptr, + void* smem, + const DataT* queries, + uint32_t query_id) { // For VPQ descriptors, PQ_BITS>0, PQ_LEN>0, CodebookT=half static_assert(PQ_BITS > 0 && PQ_LEN > 0 && std::is_same_v, "VPQ descriptor requires PQ_BITS>0, PQ_LEN>0, CodebookT=half"); - // Reconstruct the descriptor pointer from void* + // Reconstruct the descriptor pointer from base pointer using desc_t = cagra_q_dataset_descriptor_t; - const desc_t* desc = reinterpret_cast(desc_ptr); + const desc_t* desc = static_cast(desc_ptr); // Call the free function directly - it takes DescriptorT as template parameter const desc_t* result = setup_workspace_vpq(desc, smem, queries, query_id); - return const_cast(result); + return const_cast*>( + static_cast*>(result)); } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 8b96f3834d..123308c581 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -60,14 +60,6 @@ void select_and_run_jit( SampleFilterT sample_filter, cudaStream_t stream) { - RAFT_LOG_INFO( - "[JIT LAUNCHER] Entering MULTI_CTA launcher (num_queries=%u, topk=%u, num_cta_per_query=%u, " - "result_buffer_size=%u, itopk_size=%zu)", - num_queries, - topk, - num_cta_per_query, - result_buffer_size, - ps.itopk_size); // Extract bitset data from filter object (if it's a bitset_filter) uint32_t* bitset_ptr = nullptr; SourceIndexT bitset_len = 0; @@ -82,22 +74,13 @@ void select_and_run_jit( using InnerFilter = decltype(sample_filter.filter); // Always extract offset for wrapped filters query_id_offset = sample_filter.offset; - RAFT_LOG_INFO("Extracted query_id_offset: %u", query_id_offset); if constexpr (is_bitset_filter::value) { // Extract bitset data for bitset_filter (works for any bitset_filter instantiation) auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); - RAFT_LOG_INFO("Extracted bitset data: bitset_ptr=%p, bitset_len=%zu, original_nbits=%zu", - bitset_ptr, - static_cast(bitset_len), - static_cast(original_nbits)); - } else { - RAFT_LOG_INFO("InnerFilter is not bitset_filter, skipping bitset extraction"); } - } else { - RAFT_LOG_INFO("Filter does not have wrapper members (.filter/.offset), skipping extraction"); } // Create planner with tags @@ -130,12 +113,6 @@ void select_and_run_jit( dataset_desc.pq_bits, dataset_desc.pq_len); - // Register descriptor accessor fragments first (needed for void* descriptor access) - planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -150,7 +127,6 @@ void select_and_run_jit( dataset_desc.pq_len); std::string filter_name = get_sample_filter_name(); planner.add_sample_filter_device_function(filter_name); - RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_CTA filter name: %s", filter_name.c_str()); // Get launcher using the planner's entrypoint name and fragment key auto params = make_fragment_key(); @@ -158,13 +134,6 @@ void select_and_run_jit( if (!launcher) { RAFT_FAIL("Failed to get JIT launcher"); } - RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_CTA launcher obtained (kernel handle: %p)", - launcher->get_kernel()); - - // Verify kernel handle is valid - cudaKernel_t kernel_handle = launcher->get_kernel(); - if (kernel_handle == nullptr) { RAFT_FAIL("JIT launcher has null kernel handle"); } - uint32_t max_elements{}; if (result_buffer_size <= 64) { max_elements = 64; @@ -190,17 +159,11 @@ void select_and_run_jit( dim3 block_dims(block_size, 1, 1); dim3 grid_dims(num_cta_per_query, num_queries, 1); - RAFT_LOG_DEBUG("Launching JIT multi_cta kernel with %u threads, (%u, %u) blocks %u smem", - block_size, - num_cta_per_query, - num_queries, - smem_size); // Get the device descriptor pointer const dataset_descriptor_base_t* dev_desc_base = dataset_desc.dev_ptr(stream); const auto* dev_desc = dev_desc_base; - if (dev_desc == nullptr) { RAFT_FAIL("Device descriptor pointer is NULL"); } // Note: dataset_desc is passed by const reference, so it stays alive for the duration of this // function The descriptor's state is managed by a shared_ptr internally, so no need to explicitly @@ -220,41 +183,6 @@ void select_and_run_jit( const uint32_t max_iterations_u32 = static_cast(ps.max_iterations); const unsigned num_random_samplings_u = static_cast(ps.num_random_samplings); - RAFT_LOG_INFO( - "[JIT LAUNCHER] MULTI_CTA dispatch parameters: graph_degree=%u, traversed_hash_bitlen=%u, " - "itopk_size=%u, bitset_len=%u, original_nbits=%u, query_id_offset=%u", - graph_degree_u32, - traversed_hash_bitlen_u32, - itopk_size_u32, - static_cast(bitset_len), - static_cast(original_nbits), - query_id_offset); - - // Validate critical pointers before dispatch - if (topk_indices_ptr == nullptr) { RAFT_FAIL("MULTI_CTA: topk_indices_ptr is NULL"); } - if (topk_distances_ptr == nullptr) { RAFT_FAIL("MULTI_CTA: topk_distances_ptr is NULL"); } - if (graph.data_handle() == nullptr) { RAFT_FAIL("MULTI_CTA: graph.data_handle() is NULL"); } - if (dev_desc == nullptr) { RAFT_FAIL("MULTI_CTA: dev_desc is NULL"); } - RAFT_LOG_INFO( - "[JIT LAUNCHER] MULTI_CTA pointer validation passed: topk_indices=%p, topk_distances=%p, " - "graph=%p, dev_desc=%p", - topk_indices_ptr, - topk_distances_ptr, - graph.data_handle(), - dev_desc); - - // Log all critical parameters before dispatch to help diagnose issues - RAFT_LOG_INFO( - "[JIT LAUNCHER] MULTI_CTA pre-dispatch: num_queries=%u, topk=%u, num_cta_per_query=%u, " - "max_elements=%u, graph.extent(0)=%zu, graph.extent(1)=%zu", - num_queries, - topk, - num_cta_per_query, - max_elements, - graph.extent(0), - graph.extent(1)); - - RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_CTA launching kernel on stream=%p", stream); launcher->dispatch(stream, grid_dims, block_dims, @@ -283,42 +211,7 @@ void select_and_run_jit( bitset_len, original_nbits); - // Check for launch errors immediately - cudaError_t launch_err = cudaPeekAtLastError(); - if (launch_err != cudaSuccess) { - RAFT_LOG_ERROR("[JIT LAUNCHER] MULTI_CTA kernel launch error detected: %s (error code: %d)", - cudaGetErrorString(launch_err), - launch_err); - RAFT_CUDA_TRY(launch_err); - } - - // Synchronize to catch kernel execution errors before they propagate - // This ensures the kernel completes before we return, preventing parameter lifetime issues - cudaError_t sync_err = cudaStreamSynchronize(stream); - if (sync_err != cudaSuccess) { - RAFT_LOG_ERROR("[JIT LAUNCHER] MULTI_CTA kernel execution failed: %s (error code: %d)", - cudaGetErrorString(sync_err), - sync_err); - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_CTA parameters: graph_degree=%u, itopk_size=%u, num_queries=%u, " - "topk=%u, num_cta_per_query=%u, max_elements=%u", - graph_degree_u32, - itopk_size_u32, - num_queries, - topk, - num_cta_per_query, - max_elements); - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_CTA pointers: topk_indices=%p, topk_distances=%p, graph=%p, " - "dev_desc=%p", - topk_indices_ptr, - topk_distances_ptr, - graph.data_handle(), - dev_desc); - RAFT_CUDA_TRY(sync_err); - } - - RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_CTA kernel completed successfully"); + RAFT_CUDA_TRY(cudaPeekAtLastError()); } } // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index d70dfb5fae..0fa02b47fa 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -93,7 +93,6 @@ void get_value(T* const host_ptr, const T* const dev_ptr, cudaStream_t cuda_stre template auto get_value(const T* const dev_ptr, cudaStream_t stream) -> T { - if (dev_ptr == nullptr) { RAFT_FAIL("get_value: dev_ptr is NULL"); } T value; RAFT_CUDA_TRY(cudaMemcpyAsync(&value, dev_ptr, sizeof(value), cudaMemcpyDefault, stream)); RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); @@ -730,9 +729,6 @@ struct search RAFT_LOG_DEBUG("# topk_workspace_size: %lu", topk_workspace_size); topk_workspace.resize(topk_workspace_size, raft::resource::get_cuda_stream(res)); terminate_flag.resize(1, raft::resource::get_cuda_stream(res)); - if (terminate_flag.data() == nullptr) { - RAFT_FAIL("Failed to allocate terminate_flag: resize returned nullptr"); - } hashmap.resize(hashmap_size, raft::resource::get_cuda_stream(res)); } @@ -862,14 +858,6 @@ struct search } // Choose initial entry point candidates at random - // Validate all pointers before kernel launch to prevent illegal memory access - if (result_indices.data() == nullptr) { - RAFT_FAIL("result_indices.data() is NULL before random_pickup"); - } - if (result_distances.data() == nullptr) { - RAFT_FAIL("result_distances.data() is NULL before random_pickup"); - } - if (hashmap.data() == nullptr) { RAFT_FAIL("hashmap.data() is NULL before random_pickup"); } random_pickup(dataset_desc, queries_ptr, num_queries, @@ -910,34 +898,12 @@ struct search break; } - if (iter + 1 >= min_iterations) { - if (terminate_flag.data() == nullptr) { - RAFT_FAIL("terminate_flag.data() is NULL before set_value at iteration %u", iter + 1); - } - set_value(terminate_flag.data(), 1, stream); - } + if (iter + 1 >= min_iterations) { set_value(terminate_flag.data(), 1, stream); } // pickup parent nodes uint32_t _small_hash_bitlen = 0; if ((iter + 1) % small_hash_reset_interval == 0) { _small_hash_bitlen = small_hash_bitlen; } - // Validate all pointers before passing to kernel to prevent memory corruption - if (terminate_flag.data() == nullptr) { - RAFT_FAIL("terminate_flag.data() is NULL before pickup_next_parents at iteration %u", - iter + 1); - } - if (result_indices.data() == nullptr) { - RAFT_FAIL("result_indices.data() is NULL before pickup_next_parents at iteration %u", - iter + 1); - } - if (hashmap.data() == nullptr) { - RAFT_FAIL("hashmap.data() is NULL before pickup_next_parents at iteration %u", iter + 1); - } - if (parent_node_list.data() == nullptr) { - RAFT_FAIL("parent_node_list.data() is NULL before pickup_next_parents at iteration %u", - iter + 1); - } - pickup_next_parents(result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, result_buffer_allocation_size, itopk_size, @@ -953,9 +919,6 @@ struct search // termination (2) if (iter + 1 >= min_iterations) { - if (terminate_flag.data() == nullptr) { - RAFT_FAIL("terminate_flag.data() is NULL at iteration %u", iter + 1); - } if (get_value(terminate_flag.data(), stream)) { iter++; break; diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh index c5d36a6bc9..db0fdf940f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -49,11 +49,6 @@ void random_pickup_jit(const dataset_descriptor_host& std::uint32_t hash_bitlen, cudaStream_t cuda_stream) { - RAFT_LOG_INFO( - "[JIT LAUNCHER] Entering MULTI_KERNEL launcher (random_pickup: num_queries=%zu, " - "num_pickup=%zu)", - num_queries, - num_pickup); // Create planner with tags using DataTag = decltype(get_data_type_tag()); using IndexTag = decltype(get_index_type_tag()); @@ -69,12 +64,6 @@ void random_pickup_jit(const dataset_descriptor_host& dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); - // Register descriptor accessor fragments first (needed for void* descriptor access) - planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -96,31 +85,12 @@ void random_pickup_jit(const dataset_descriptor_host& // Get the device descriptor pointer const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); - if (dev_desc == nullptr) { - RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) dev_desc is NULL"); - } - - // Validate all pointers before kernel launch to prevent illegal memory access - if (queries_ptr == nullptr) { - RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) queries_ptr is NULL"); - } - if (result_indices_ptr == nullptr) { - RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) result_indices_ptr is NULL"); - } - if (result_distances_ptr == nullptr) { - RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) result_distances_ptr is NULL"); - } - if (visited_hashmap_ptr == nullptr) { - RAFT_FAIL("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) visited_hashmap_ptr is NULL"); - } // Cast size_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly const uint32_t ldr_u32 = static_cast(ldr); // Dispatch kernel via launcher - RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) launching kernel on stream=%p", - cuda_stream); launcher->dispatch(cuda_stream, grid_size, dim3(block_size, 1, 1), @@ -138,34 +108,7 @@ void random_pickup_jit(const dataset_descriptor_host& visited_hashmap_ptr, hash_bitlen); - // Check for launch errors immediately - cudaError_t launch_err = cudaPeekAtLastError(); - if (launch_err != cudaSuccess) { - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_KERNEL (random_pickup) kernel launch error detected: %s (error code: " - "%d)", - cudaGetErrorString(launch_err), - launch_err); - RAFT_CUDA_TRY(launch_err); - } - - // Synchronize to catch kernel execution errors before they propagate - cudaError_t sync_err = cudaStreamSynchronize(cuda_stream); - if (sync_err != cudaSuccess) { - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_KERNEL (random_pickup) kernel execution failed: %s (error code: %d)", - cudaGetErrorString(sync_err), - sync_err); - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_KERNEL (random_pickup) parameters: num_queries=%zu, num_pickup=%zu, " - "ldr=%u", - num_queries, - num_pickup, - ldr_u32); - RAFT_CUDA_TRY(sync_err); - } - - RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_KERNEL (random_pickup) kernel completed successfully"); + RAFT_CUDA_TRY(cudaPeekAtLastError()); } // JIT version of compute_distance_to_child_nodes @@ -194,11 +137,6 @@ void compute_distance_to_child_nodes_jit( SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream) { - RAFT_LOG_INFO( - "[JIT LAUNCHER] Entering MULTI_KERNEL launcher (compute_distance_to_child_nodes: " - "num_queries=%u, search_width=%u)", - num_queries, - search_width); // Create planner with tags using DataTag = decltype(get_data_type_tag()); using IndexTag = decltype(get_index_type_tag()); @@ -214,12 +152,6 @@ void compute_distance_to_child_nodes_jit( dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); - // Register descriptor accessor fragments first (needed for void* descriptor access) - planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -265,36 +197,7 @@ void compute_distance_to_child_nodes_jit( ldd, sample_filter); - // Check for launch errors immediately - cudaError_t launch_err = cudaPeekAtLastError(); - if (launch_err != cudaSuccess) { - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_KERNEL (compute_distance_to_child_nodes) kernel launch error detected: " - "%s (error code: %d)", - cudaGetErrorString(launch_err), - launch_err); - RAFT_CUDA_TRY(launch_err); - } - - // Synchronize to catch kernel execution errors before they propagate - cudaError_t sync_err = cudaStreamSynchronize(cuda_stream); - if (sync_err != cudaSuccess) { - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_KERNEL (compute_distance_to_child_nodes) kernel execution failed: %s " - "(error code: %d)", - cudaGetErrorString(sync_err), - sync_err); - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_KERNEL (compute_distance_to_child_nodes) parameters: num_queries=%u, " - "search_width=%u, graph_degree=%u", - num_queries, - search_width, - graph_degree); - RAFT_CUDA_TRY(sync_err); - } - - RAFT_LOG_INFO( - "[JIT LAUNCHER] MULTI_KERNEL (compute_distance_to_child_nodes) kernel completed successfully"); + RAFT_CUDA_TRY(cudaPeekAtLastError()); } // JIT version of apply_filter @@ -309,11 +212,6 @@ void apply_filter_jit(const SourceIndexT* source_indices_ptr, SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream) { - RAFT_LOG_INFO( - "[JIT LAUNCHER] Entering MULTI_KERNEL launcher (apply_filter: num_queries=%u, " - "result_buffer_size=%u)", - num_queries, - result_buffer_size); // Extract bitset data from filter object (if it's a bitset_filter) uint32_t* bitset_ptr = nullptr; SourceIndexT bitset_len = 0; @@ -326,22 +224,13 @@ void apply_filter_jit(const SourceIndexT* source_indices_ptr, sample_filter.offset; }) { using InnerFilter = decltype(sample_filter.filter); - RAFT_LOG_INFO("Filter has wrapper members, query_id_offset parameter: %u", query_id_offset); if constexpr (is_bitset_filter::value) { // Extract bitset data for bitset_filter (works for any bitset_filter instantiation) auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); - RAFT_LOG_INFO("Extracted bitset data: bitset_ptr=%p, bitset_len=%zu, original_nbits=%zu", - bitset_ptr, - static_cast(bitset_len), - static_cast(original_nbits)); - } else { - RAFT_LOG_INFO("InnerFilter is not bitset_filter, skipping bitset extraction"); } - } else { - RAFT_LOG_INFO("Filter does not have wrapper members (.filter/.offset), skipping extraction"); } // Create planner with tags @@ -386,36 +275,7 @@ void apply_filter_jit(const SourceIndexT* source_indices_ptr, bitset_len, original_nbits); - // Check for launch errors immediately - cudaError_t launch_err = cudaPeekAtLastError(); - if (launch_err != cudaSuccess) { - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_KERNEL (apply_filter) kernel launch error detected: %s (error code: " - "%d)", - cudaGetErrorString(launch_err), - launch_err); - RAFT_CUDA_TRY(launch_err); - } - - // Synchronize to catch kernel execution errors before they propagate - cudaError_t sync_err = cudaStreamSynchronize(cuda_stream); - if (sync_err != cudaSuccess) { - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_KERNEL (apply_filter) kernel execution failed: %s (error code: %d)", - cudaGetErrorString(sync_err), - sync_err); - RAFT_LOG_ERROR( - "[JIT LAUNCHER] MULTI_KERNEL (apply_filter) parameters: num_queries=%u, " - "result_buffer_size=%u, bitset_len=%u, original_nbits=%u, query_id_offset=%u", - num_queries, - result_buffer_size, - static_cast(bitset_len), - static_cast(original_nbits), - query_id_offset); - RAFT_CUDA_TRY(sync_err); - } - - RAFT_LOG_INFO("[JIT LAUNCHER] MULTI_KERNEL (apply_filter) kernel completed successfully"); + RAFT_CUDA_TRY(cudaPeekAtLastError()); } } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 34a9f11807..ebc7d5dc1b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -383,22 +383,13 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn using InnerFilter = decltype(sample_filter.filter); // Always extract offset for wrapped filters query_id_offset = sample_filter.offset; - RAFT_LOG_INFO("Extracted query_id_offset: %u", query_id_offset); if constexpr (is_bitset_filter::value) { // Extract bitset data for bitset_filter (works for any bitset_filter instantiation) auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); - RAFT_LOG_INFO("Extracted bitset data: bitset_ptr=%p, bitset_len=%zu, original_nbits=%zu", - bitset_ptr, - static_cast(bitset_len), - static_cast(original_nbits)); - } else { - RAFT_LOG_INFO("InnerFilter is not bitset_filter, skipping bitset extraction"); } - } else { - RAFT_LOG_INFO("Filter does not have wrapper members (.filter/.offset), skipping extraction"); } // set kernel launch parameters @@ -487,12 +478,6 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn bitset_len, original_nbits); - RAFT_LOG_INFO( - "Initialized the JIT persistent kernel in stream %zd; job_queue size = %u; worker_queue size " - "= %u", - int64_t((cudaStream_t)stream), - job_queue.capacity(), - worker_queue.capacity()); last_touch.store(std::chrono::system_clock::now(), std::memory_order_relaxed); } @@ -503,7 +488,6 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn whs[worker_queue.pop().wait()].data.store({kNoMoreWork}, cuda::memory_order_relaxed); } RAFT_CUDA_TRY_NO_THROW(cudaStreamSynchronize(stream)); - RAFT_LOG_INFO("Destroyed the JIT persistent runner."); } void launch(uintptr_t result_indices_ptr, @@ -590,11 +574,6 @@ void select_and_run_jit( SampleFilterT sample_filter, cudaStream_t stream) { - RAFT_LOG_INFO( - "[JIT LAUNCHER] Entering SINGLE_CTA launcher (persistent=%d, num_queries=%u, topk=%u)", - ps.persistent ? 1 : 0, - num_queries, - topk); const SourceIndexT* source_indices_ptr = source_indices.has_value() ? source_indices->data_handle() : nullptr; @@ -613,26 +592,13 @@ void select_and_run_jit( using InnerFilter = decltype(sample_filter.filter); // Always extract offset for wrapped filters query_id_offset = sample_filter.offset; - RAFT_LOG_INFO("Extracted query_id_offset: %u", query_id_offset); if constexpr (is_bitset_filter::value) { // Extract bitset data for bitset_filter (works for any bitset_filter instantiation) auto bitset_view = sample_filter.filter.view(); bitset_ptr = const_cast(bitset_view.data()); bitset_len = static_cast(bitset_view.size()); original_nbits = static_cast(bitset_view.get_original_nbits()); - RAFT_LOG_INFO("Extracted bitset data: bitset_ptr=%p, bitset_len=%zu, original_nbits=%zu", - bitset_ptr, - static_cast(bitset_len), - static_cast(original_nbits)); - RAFT_LOG_INFO("InnerFilter type: %s, bitset_view.size() type: %s, SourceIndexT: %s", - typeid(InnerFilter).name(), - typeid(decltype(bitset_view.size())).name(), - typeid(SourceIndexT).name()); - } else { - RAFT_LOG_INFO("InnerFilter is not bitset_filter, skipping bitset extraction"); } - } else { - RAFT_LOG_INFO("Filter does not have wrapper members (.filter/.offset), skipping extraction"); } // Use common logic to compute launch config @@ -666,12 +632,6 @@ void select_and_run_jit( true /* persistent */); // Add device functions - // Register descriptor accessor fragments first (needed for void* descriptor access) - planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -684,7 +644,7 @@ void select_and_run_jit( dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); - planner.add_sample_filter_device_function(get_sample_filter_name(true)); + planner.add_sample_filter_device_function(get_sample_filter_name()); // Get launcher for persistent kernel auto launcher = planner.get_launcher(); @@ -734,12 +694,6 @@ void select_and_run_jit( dataset_desc.pq_len); // Add device functions (tags are determined inside the planner methods) - // Register descriptor accessor fragments first (needed for void* descriptor access) - planner.add_descriptor_accessor_device_functions(dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); planner.add_setup_workspace_device_function(dataset_desc.metric, dataset_desc.team_size, dataset_desc.dataset_block_dim, @@ -752,7 +706,7 @@ void select_and_run_jit( dataset_desc.is_vpq, dataset_desc.pq_bits, dataset_desc.pq_len); - planner.add_sample_filter_device_function(get_sample_filter_name(true)); + planner.add_sample_filter_device_function(get_sample_filter_name()); // Get launcher auto launcher = planner.get_launcher(); @@ -815,48 +769,7 @@ void select_and_run_jit( bitset_len, original_nbits); - // Check for launch errors immediately - cudaError_t launch_err = cudaPeekAtLastError(); - if (launch_err != cudaSuccess) { - RAFT_LOG_ERROR("[JIT LAUNCHER] SINGLE_CTA kernel launch error detected: %s (error code: %d)", - cudaGetErrorString(launch_err), - launch_err); - RAFT_LOG_ERROR( - "[JIT LAUNCHER] SINGLE_CTA parameters: graph_degree=%u, itopk_size=%u, num_queries=%u, " - "topk=%u", - graph_degree_u32, - itopk_size_u32, - num_queries, - topk); - RAFT_CUDA_TRY(launch_err); - } - - // Synchronize to catch kernel execution errors before they propagate - // This ensures the kernel completes before we return, preventing parameter lifetime issues - cudaError_t sync_err = cudaStreamSynchronize(stream); - if (sync_err != cudaSuccess) { - RAFT_LOG_ERROR("[JIT LAUNCHER] SINGLE_CTA kernel execution failed: %s (error code: %d)", - cudaGetErrorString(sync_err), - sync_err); - RAFT_LOG_ERROR( - "[JIT LAUNCHER] SINGLE_CTA parameters: graph_degree=%u, itopk_size=%u, num_queries=%u, " - "topk=%u, search_width=%u", - graph_degree_u32, - itopk_size_u32, - num_queries, - topk, - search_width_u32); - RAFT_LOG_ERROR( - "[JIT LAUNCHER] SINGLE_CTA pointers: topk_indices=%p, topk_distances=%p, graph=%p, " - "dev_desc=%p", - reinterpret_cast(topk_indices_ptr), - topk_distances_ptr, - graph.data_handle(), - dev_desc); - RAFT_CUDA_TRY(sync_err); - } - - RAFT_LOG_INFO("[JIT LAUNCHER] SINGLE_CTA kernel completed successfully"); + RAFT_CUDA_TRY(cudaPeekAtLastError()); } } diff --git a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp index 99050b089b..3f7659da19 100644 --- a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp +++ b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp @@ -60,21 +60,13 @@ struct is_bitset_filter -std::string get_sample_filter_name(bool debug_output = false) +std::string get_sample_filter_name() { using namespace cuvs::neighbors::filtering; using DecayedFilter = std::decay_t; - if (debug_output) { - std::cerr << "[JIT] get_sample_filter_name called" << std::endl; - std::cerr << "[JIT] Type name: " << typeid(DecayedFilter).name() << std::endl; - } - // First check for none_sample_filter (the only unwrapped case) - if constexpr (std::is_same_v) { - if (debug_output) { std::cerr << "[JIT] Returning: filter_none_ui" << std::endl; } - return "filter_none_ui"; - } + if constexpr (std::is_same_v) { return "filter_none_ui"; } // All other filters are wrapped in CagraSampleFilterWithQueryIdOffset // Access the inner filter type via decltype @@ -83,17 +75,11 @@ std::string get_sample_filter_name(bool debug_output = false) if constexpr (is_bitset_filter::value || std::is_same_v> || std::is_same_v>) { - if (debug_output) { - std::cerr << "[JIT] Returning: filter_bitset_ui (via wrapped filter)" << std::endl; - } return "filter_bitset_ui"; } } // Default to none filter for unknown types - if (debug_output) { - std::cerr << "[JIT] Returning: filter_none_ui (default/unknown)" << std::endl; - } return "filter_none_ui"; } From 75e26162b072532e2fbb357ce404d47fc660ac25 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 20 Feb 2026 06:44:10 +0000 Subject: [PATCH 119/158] Account for different QueryT --- .../modules/generate_jit_lto_kernels.cmake | 528 +++++++++++------- .../cagra/compute_distance_standard-impl.cuh | 33 +- .../cagra/compute_distance_vpq-impl.cuh | 15 +- .../apply_normalization_standard_cosine.cu.in | 8 +- ...ply_normalization_standard_cosine_impl.cuh | 6 +- .../apply_normalization_standard_noop.cu.in | 8 +- ...apply_normalization_standard_noop_impl.cuh | 4 +- .../jit_lto_kernels/compute_distance_impl.cuh | 28 - .../compute_distance_standard.cu.in | 15 +- .../compute_distance_standard_impl.cuh | 38 +- ...compute_distance_standard_impl_unified.cuh | 37 -- .../compute_distance_standard_unified.cu.in | 42 -- ...mpute_distance_to_child_nodes_kernel.cu.in | 12 +- .../compute_distance_vpq.cu.in | 10 +- .../compute_distance_vpq_impl.cuh | 51 +- .../compute_distance_vpq_impl_unified.cuh | 16 +- .../compute_distance_vpq_unified.cu.in | 43 -- .../jit_lto_kernels/device_common_jit.cuh | 14 +- .../cagra/jit_lto_kernels/dist_op.cu.in | 16 +- .../jit_lto_kernels/dist_op_cosine_impl.cuh | 5 +- .../jit_lto_kernels/dist_op_hamming_impl.cuh | 5 +- .../dist_op_inner_product_impl.cuh | 5 +- .../cagra/jit_lto_kernels/dist_op_l2_impl.cuh | 5 +- .../extern_device_functions.cuh | 86 +-- .../random_pickup_kernel.cu.in | 12 +- .../search_multi_cta_kernel.cu.in | 12 +- .../search_multi_cta_kernel_jit.cuh | 36 +- .../search_multi_cta_planner.hpp | 14 +- .../search_multi_kernel_jit.cuh | 21 +- .../search_multi_kernel_planner.hpp | 14 +- .../search_single_cta_kernel.cu.in | 12 +- .../search_single_cta_kernel_jit.cuh | 68 ++- .../search_single_cta_kernel_p.cu.in | 12 +- .../search_single_cta_planner.hpp | 15 +- .../jit_lto_kernels/setup_workspace_impl.cuh | 29 - .../setup_workspace_standard.cu.in | 11 +- .../setup_workspace_standard_impl.cuh | 55 +- .../setup_workspace_standard_impl_unified.cuh | 45 -- .../jit_lto_kernels/setup_workspace_vpq.cu.in | 5 +- .../setup_workspace_vpq_impl.cuh | 71 +-- .../setup_workspace_vpq_impl_unified.cuh | 52 -- 41 files changed, 697 insertions(+), 817 deletions(-) delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_impl.cuh delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl_unified.cuh delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl_unified.cuh diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 3ba140e337..c27b78dc04 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -241,21 +241,23 @@ function(generate_jit_lto_kernels target) set(cagra_source_index_abbrevs "ui") # Generate setup_workspace_standard fragments (one per team_size, dataset_block_dim, data_type, - # index_type, distance_type) Note: Metric is no longer a template parameter - it's linked via - # dist_op and normalization fragments + # index_type, distance_type, query_type) QueryT can be float (for most metrics) or uint8_t (for + # BitwiseHamming when DataT=uint8_t) foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # setup_workspace_standard (no metric in name) + # Always generate QueryT=float fragment set(kernel_name - "setup_workspace_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "setup_workspace_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_f" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") set(data_type "${data_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") + set(query_type "float") + set(query_type_abbrev "f") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( @@ -270,29 +272,58 @@ function(generate_jit_lto_kernels target) EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" EMBEDDED_ARRAY "embedded_${kernel_name}" ) + # For uint8_t data type, also generate QueryT=uint8_t fragment (for BitwiseHamming) + if(data_idx EQUAL 2) # uint8_t + set(kernel_name + "setup_workspace_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_uc" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(query_type "uint8_t") + set(query_type_abbrev "uc") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endif() endforeach() endforeach() endforeach() # Generate compute_distance_standard fragments (without metric - metric is handled via dist_op - # fragments) + # fragments) QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming when + # DataT=uint8_t) foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # compute_distance_standard (no metric parameter - uses JIT-linked dist_op) + # Always generate QueryT=float fragment set(kernel_name - "compute_distance_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "compute_distance_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_f" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") set(data_type "${data_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") + set(query_type "float") + set(query_type_abbrev "f") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in" "${filename}" @ONLY ) @@ -303,44 +334,70 @@ function(generate_jit_lto_kernels target) EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" EMBEDDED_ARRAY "embedded_${kernel_name}" ) + # For uint8_t data type, also generate QueryT=uint8_t fragment (for BitwiseHamming) + if(data_idx EQUAL 2) # uint8_t + set(kernel_name + "compute_distance_standard_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_uc" + ) + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(data_type "${data_type}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(query_type "uint8_t") + set(query_type_abbrev "uc") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endif() endforeach() endforeach() endforeach() - # Generate dist_op fragments for each metric Note: InnerProduct and CosineExpanded both use - # inner_product dist_op, so we only generate it once - foreach(data_idx IN ITEMS 0 1 2 3) - list(GET cagra_data_types ${data_idx} data_type) - list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) - # Generate dist_op fragments for unique metric tags only l2, inner_product (used by both ip and - # cos), hamming - set(dist_op_tags "l2" "inner_product" "hamming") - foreach(metric_tag IN LISTS dist_op_tags) - # Skip hamming for non-uint8_t types - if(metric_tag STREQUAL "hamming" AND NOT data_idx EQUAL 2) - continue() - endif() - # Generate dist_op fragment for this metric tag Note: dist_op only needs DataT and DistanceT, - # not IndexT - set(kernel_name "dist_op_${metric_tag}_${type_abbrev}_${cagra_distance_abbrev}") - set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - set(metric_tag "${metric_tag}") - set(data_type "${data_type}") - set(distance_type "${cagra_distance_type}") - set(type_abbrev "${type_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in" - "${filename}" @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) - endforeach() + # Generate dist_op fragments for each metric QueryT can be float (for most metrics) or uint8_t + # (for BitwiseHamming) DistanceT is always float Generate dist_op fragments for unique metric + # tags: l2, inner_product (used by both ip and cos), hamming + set(dist_op_tags "l2" "inner_product" "hamming") + foreach(metric_tag IN LISTS dist_op_tags) + if(metric_tag STREQUAL "hamming") + # BitwiseHamming uses QueryT=uint8_t + set(query_type "uint8_t") + set(query_type_abbrev "uc") + else() + # L2 and InnerProduct use QueryT=float + set(query_type "float") + set(query_type_abbrev "f") + endif() + # Generate dist_op fragment for this metric tag Note: dist_op uses QueryT and DistanceT, not + # DataT + set(kernel_name "dist_op_${metric_tag}_${query_type_abbrev}_${cagra_distance_abbrev}") + set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(metric_tag "${metric_tag}") + set(query_type "${query_type}") + set(query_type_abbrev "${query_type_abbrev}") + set(distance_type "${cagra_distance_type}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in" + "${filename}" @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_device_functions/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) endforeach() # Generate normalization fragments (no-op and cosine) These are used to normalize distances for @@ -411,6 +468,8 @@ function(generate_jit_lto_kernels target) # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore set(pq_bits "${cagra_pq_bits}") set(codebook_type "${cagra_codebook_type}") + set(query_type "half") + set(query_type_abbrev "h") set(data_type "${data_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") @@ -438,13 +497,15 @@ function(generate_jit_lto_kernels target) # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore set(pq_bits "${cagra_pq_bits}") set(codebook_type "${cagra_codebook_type}") + set(query_type "half") + set(query_type_abbrev "h") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") set(data_type "${data_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in" "${filename}" @ONLY ) @@ -489,69 +550,88 @@ function(generate_jit_lto_kernels target) # CAGRA only uses uint32_t as SourceIndexT set(source_index_type "uint32_t") set(src_idx_abbrev "ui") - # Regular kernel entrypoint (no metric in name) - set(kernel_name - "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" - ) - set(filename - "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" - ) - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(pq_bits "0") - set(pq_len "0") - set(codebook_type "void") - set(pq_suffix "") - set(pq_prefix "") - set(codebook_tag "") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) + # Generate QueryT variants: float for all, uint8_t only when DataT=uint8_t + set(query_type_variants "float") + set(query_type_abbrev_variants "f") + if(data_idx EQUAL 2) # uint8_t + list(APPEND query_type_variants "uint8_t") + list(APPEND query_type_abbrev_variants "uc") + endif() + foreach(query_idx IN ITEMS 0 1) + # Skip second iteration if we don't have uint8_t variant + if(query_idx EQUAL 1 AND NOT data_idx EQUAL 2) + break() + endif() + list(GET query_type_variants ${query_idx} query_type) + list(GET query_type_abbrev_variants ${query_idx} query_type_abbrev) + # Regular kernel entrypoint (no metric in name) + set(kernel_name + "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${query_type_abbrev}_${src_idx_abbrev}" + ) + set(filename + "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" + ) + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") + set(query_type "${query_type}") + set(query_type_abbrev "${query_type_abbrev}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) - # Persistent kernel entrypoint (no metric in name) - set(kernel_name - "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" - ) - set(filename - "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" - ) - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(pq_bits "0") - set(pq_len "0") - set(codebook_type "void") - set(pq_suffix "") - set(pq_prefix "") - set(codebook_tag "") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) + # Persistent kernel entrypoint (no metric in name) + set(kernel_name + "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${query_type_abbrev}_${src_idx_abbrev}" + ) + set(filename + "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" + ) + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") + set(query_type "${query_type}") + set(query_type_abbrev "${query_type_abbrev}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() # query_type variant endforeach() # dataset_block_dim endforeach() # team_size endforeach() # merge_idx @@ -583,7 +663,7 @@ function(generate_jit_lto_kernels target) # parameters distinguish VPQ Metric is no longer in the kernel name - VPQ only # supports L2Expanded set(kernel_name - "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "search_single_cta_kernel_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_h_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" @@ -598,6 +678,8 @@ function(generate_jit_lto_kernels target) set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "") set(codebook_tag ", tag_codebook_half") + set(query_type "half") + set(query_type_abbrev "h") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -619,7 +701,7 @@ function(generate_jit_lto_kernels target) # parameters distinguish VPQ Metric is no longer in the kernel name - VPQ only # supports L2Expanded set(kernel_name - "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "search_single_cta_kernel_p_${topk_by_bitonic_sort_str}_${bitonic_sort_and_merge_multi_warps_str}_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_h_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu" @@ -634,6 +716,8 @@ function(generate_jit_lto_kernels target) set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "") set(codebook_tag ", tag_codebook_half") + set(query_type "half") + set(query_type_abbrev "h") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -672,35 +756,52 @@ function(generate_jit_lto_kernels target) # CAGRA only uses uint32_t as SourceIndexT set(source_index_type "uint32_t") set(src_idx_abbrev "ui") - # Multi_cta kernel entrypoint (no metric in name) - set(kernel_name - "search_multi_cta_kernel_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(pq_bits "0") - set(pq_len "0") - set(codebook_type "void") - set(pq_suffix "") - set(pq_prefix "") - set(codebook_tag "") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) + # Generate QueryT variants: float for all, uint8_t only when DataT=uint8_t + set(query_type_variants "float") + set(query_type_abbrev_variants "f") + if(data_idx EQUAL 2) # uint8_t + list(APPEND query_type_variants "uint8_t") + list(APPEND query_type_abbrev_variants "uc") + endif() + foreach(query_idx IN ITEMS 0 1) + # Skip second iteration if we don't have uint8_t variant + if(query_idx EQUAL 1 AND NOT data_idx EQUAL 2) + break() + endif() + list(GET query_type_variants ${query_idx} query_type) + list(GET query_type_abbrev_variants ${query_idx} query_type_abbrev) + # Multi_cta kernel entrypoint (no metric in name) + set(kernel_name + "search_multi_cta_kernel_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${query_type_abbrev}_${src_idx_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") + set(query_type "${query_type}") + set(query_type_abbrev "${query_type_abbrev}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() # query_type variant endforeach() # dataset_block_dim endforeach() # team_size endforeach() # data_idx @@ -721,7 +822,7 @@ function(generate_jit_lto_kernels target) # Multi_cta VPQ kernel entrypoint Note: Metric is no longer in the kernel name - VPQ only # supports L2Expanded set(kernel_name - "search_multi_cta_kernel_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "search_multi_cta_kernel_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_h_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore @@ -733,6 +834,8 @@ function(generate_jit_lto_kernels target) set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "_vpq") set(codebook_tag ", tag_codebook_half") + set(query_type "half") + set(query_type_abbrev "h") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -761,69 +864,88 @@ function(generate_jit_lto_kernels target) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) - # random_pickup_kernel entrypoint (no metric in name) Note: random_pickup_kernel doesn't use - # SourceIndexT, so no loop needed - set(kernel_name - "random_pickup_kernel_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(pq_bits "0") - set(pq_len "0") - set(codebook_type "void") - set(pq_suffix "") - set(pq_prefix "") - set(codebook_tag "") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) + # Generate QueryT variants: float for all, uint8_t only when DataT=uint8_t + set(query_type_variants "float") + set(query_type_abbrev_variants "f") + if(data_idx EQUAL 2) # uint8_t + list(APPEND query_type_variants "uint8_t") + list(APPEND query_type_abbrev_variants "uc") + endif() + foreach(query_idx IN ITEMS 0 1) + # Skip second iteration if we don't have uint8_t variant + if(query_idx EQUAL 1 AND NOT data_idx EQUAL 2) + break() + endif() + list(GET query_type_variants ${query_idx} query_type) + list(GET query_type_abbrev_variants ${query_idx} query_type_abbrev) + # random_pickup_kernel entrypoint (no metric in name) Note: random_pickup_kernel doesn't + # use SourceIndexT, so no loop needed + set(kernel_name + "random_pickup_kernel_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${query_type_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") + set(query_type "${query_type}") + set(query_type_abbrev "${query_type_abbrev}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) - # CAGRA only uses uint32_t as SourceIndexT - set(source_index_type "uint32_t") - set(src_idx_abbrev "ui") - # compute_distance_to_child_nodes_kernel entrypoint (no metric in name) - set(kernel_name - "compute_distance_to_child_nodes_kernel_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" - ) - set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") - set(team_size "${team_size}") - set(dataset_block_dim "${dataset_block_dim}") - set(pq_bits "0") - set(pq_len "0") - set(codebook_type "void") - set(pq_suffix "") - set(pq_prefix "") - set(codebook_tag "") - set(index_type "${cagra_index_type}") - set(distance_type "${cagra_distance_type}") - set(idx_abbrev "${cagra_index_abbrev}") - set(dist_abbrev "${cagra_distance_abbrev}") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in" - "${filename}" - @ONLY - ) - embed_jit_lto_fatbin( - FATBIN_TARGET "fatbin_${kernel_name}" - FATBIN_SOURCE "${filename}" - EMBEDDED_TARGET "${target}" - EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" - EMBEDDED_ARRAY "embedded_${kernel_name}" - ) + # CAGRA only uses uint32_t as SourceIndexT + set(source_index_type "uint32_t") + set(src_idx_abbrev "ui") + # compute_distance_to_child_nodes_kernel entrypoint (no metric in name) + set(kernel_name + "compute_distance_to_child_nodes_kernel_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${query_type_abbrev}_${src_idx_abbrev}" + ) + set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") + set(team_size "${team_size}") + set(dataset_block_dim "${dataset_block_dim}") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_suffix "") + set(pq_prefix "") + set(codebook_tag "") + set(query_type "${query_type}") + set(query_type_abbrev "${query_type_abbrev}") + set(index_type "${cagra_index_type}") + set(distance_type "${cagra_distance_type}") + set(idx_abbrev "${cagra_index_abbrev}") + set(dist_abbrev "${cagra_distance_abbrev}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in" + "${filename}" + @ONLY + ) + embed_jit_lto_fatbin( + FATBIN_TARGET "fatbin_${kernel_name}" + FATBIN_SOURCE "${filename}" + EMBEDDED_TARGET "${target}" + EMBEDDED_HEADER "${generated_kernels_dir}/cagra_kernel_entrypoints/${kernel_name}.h" + EMBEDDED_ARRAY "embedded_${kernel_name}" + ) + endforeach() # query_type variant endforeach() # dataset_block_dim endforeach() # team_size endforeach() # data_idx @@ -840,7 +962,7 @@ function(generate_jit_lto_kernels target) # random_pickup_kernel VPQ entrypoint Note: Metric is no longer in the kernel name - VPQ # only supports L2Expanded set(kernel_name - "random_pickup_kernel_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" + "random_pickup_kernel_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_h" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore @@ -852,6 +974,8 @@ function(generate_jit_lto_kernels target) set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "_vpq") set(codebook_tag ", tag_codebook_half") + set(query_type "half") + set(query_type_abbrev "h") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") @@ -875,7 +999,7 @@ function(generate_jit_lto_kernels target) # compute_distance_to_child_nodes_kernel VPQ entrypoint Note: Metric is no longer in the # kernel name - VPQ only supports L2Expanded set(kernel_name - "compute_distance_to_child_nodes_kernel_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_${src_idx_abbrev}" + "compute_distance_to_child_nodes_kernel_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}_h_${src_idx_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_kernel_entrypoints/fatbin_${kernel_name}.cu") # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore @@ -887,6 +1011,8 @@ function(generate_jit_lto_kernels target) set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "_vpq") set(codebook_tag ", tag_codebook_half") + set(query_type "half") + set(query_type_abbrev "h") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh index 281f6d98bb..bb8ea1382f 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_standard-impl.cuh @@ -21,17 +21,20 @@ namespace cuvs::neighbors::cagra::detail { // InnerProduct, etc.) The planner will link the appropriate fragment based on the metric Note: // extern functions cannot be constexpr, so we remove constexpr here Note: These are in the detail // namespace (not anonymous) so they can be found by JIT linking -template -extern __device__ DISTANCE_T dist_op(DATA_T a, DATA_T b); +// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) +template +extern __device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b); // Normalization is also JIT linked from fragments (no-op for most metrics, cosine normalization for // CosineExpanded) The planner will link the appropriate fragment (cosine or noop) based on the // metric +// QueryT is needed to match the descriptor template signature (always float for normalization) template + typename DistanceT, + typename QueryT> extern __device__ DistanceT apply_normalization_standard( DistanceT distance, const typename dataset_descriptor_base_t::args_t args, @@ -83,18 +86,20 @@ template struct standard_dataset_descriptor_t : public dataset_descriptor_base_t { using base_type = dataset_descriptor_base_t; -#if defined(CUVS_ENABLE_JIT_LTO) || defined(BUILD_KERNEL) - // When JIT LTO is enabled or building kernel fragments, Metric is not a template parameter - // QUERY_T is always float (BitwiseHamming uses uint8_t as DataT, but query is still float) - using QUERY_T = float; -#else +#if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) // When JIT LTO is disabled, Metric is a template parameter using QUERY_T = typename std:: conditional_t; +#else + // When JIT LTO is enabled, QueryT is passed as a template parameter + using QUERY_T = QueryT; #endif using base_type::args; using base_type::smem_ws_size_in_bytes; @@ -294,7 +299,8 @@ _RAFT_DEVICE __noinline__ auto compute_distance_standard( DescriptorT::kDatasetBlockDim, typename DescriptorT::DATA_T, typename DescriptorT::INDEX_T, - typename DescriptorT::DISTANCE_T>(distance, args, dataset_index); + typename DescriptorT::DISTANCE_T, + typename DescriptorT::QUERY_T>(distance, args, dataset_index); #else // When JIT LTO is disabled, kMetric is always available as a compile-time constant if constexpr (DescriptorT::kMetric == cuvs::distance::DistanceType::CosineExpanded) { @@ -343,8 +349,10 @@ RAFT_KERNEL __launch_bounds__(1, 1) dataset_norms); #else // When JIT LTO is enabled, Metric is not a template parameter + using query_t = + std::conditional_t; using desc_type = - standard_dataset_descriptor_t; + standard_dataset_descriptor_t; using base_type = typename desc_type::base_type; // For JIT, we don't use the function pointers, so set them to nullptr @@ -385,8 +393,11 @@ standard_descriptor_spec; using desc_type = - standard_dataset_descriptor_t; + standard_dataset_descriptor_t; using base_type = typename desc_type::base_type; #endif diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh index b51ae024e8..1cb593830d 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh @@ -26,12 +26,19 @@ template struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t { using base_type = dataset_descriptor_base_t; using CODE_BOOK_T = CodebookT; - using QUERY_T = half; +#if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) + using QUERY_T = half; +#else + using QUERY_T = QueryT; +#endif using base_type::args; using base_type::extra_ptr3; using typename base_type::args_t; @@ -388,6 +395,9 @@ RAFT_KERNEL __launch_bounds__(1, 1) #if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) , Metric +#else + , + half #endif >; using base_type = typename desc_type::base_type; @@ -458,6 +468,9 @@ vpq_descriptor_spec; using base_type = typename desc_type::base_type; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in index a2b4078249..1f0b47fc7b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in @@ -13,8 +13,9 @@ namespace cuvs::neighbors::cagra::detail { // Instantiate the cosine normalization function // This fragment provides apply_normalization_standard that normalizes by dataset norm +// QueryT is needed to match the descriptor template signature using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( +template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( @distance_type@, const args_t, @index_type@); } // namespace cuvs::neighbors::cagra::detail @@ -31,9 +32,12 @@ __attribute__((__constructor__)) static void register_apply_normalization_standa { // This fragment provides apply_normalization_standard (cosine normalization version) // The planner links the appropriate fragment (noop or cosine) based on metric + // QueryT is always float for normalization (only used for CosineExpanded which uses float queries) + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; registerAlgorithm( + tag_dist_@dist_abbrev@, + QueryTag>( "apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@", embedded_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, sizeof(embedded_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_impl.cuh index 38cf5a5de8..c691e58ef6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_impl.cuh @@ -12,11 +12,13 @@ namespace cuvs::neighbors::cagra::detail { // Cosine normalization fragment implementation // This provides apply_normalization_standard that normalizes by dataset norm (for CosineExpanded // metric) +// QueryT is needed to match the descriptor template signature, but not used in this function template + typename DistanceT, + typename QueryT> __device__ DistanceT apply_normalization_standard(DistanceT distance, const typename cuvs::neighbors::cagra::detail:: @@ -25,7 +27,7 @@ apply_normalization_standard(DistanceT distance, { // CosineExpanded normalization: divide by dataset norm const auto* dataset_norms = - standard_dataset_descriptor_t:: + standard_dataset_descriptor_t:: dataset_norms_ptr(args); auto norm = dataset_norms[dataset_index]; if (norm > 0) { distance = distance / norm; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in index 9b303d7420..0bc73c89c6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in @@ -13,8 +13,9 @@ namespace cuvs::neighbors::cagra::detail { // Instantiate the noop normalization function // This fragment provides apply_normalization_standard that does nothing +// QueryT is needed to match the descriptor template signature (always float for normalization) using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( +template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( @distance_type@, const args_t, @index_type@); } // namespace cuvs::neighbors::cagra::detail @@ -31,9 +32,12 @@ __attribute__((__constructor__)) static void register_apply_normalization_standa { // This fragment provides apply_normalization_standard (no-op version) // The planner links the appropriate fragment (noop or cosine) based on metric + // QueryT is always float for normalization (only used for CosineExpanded which uses float queries) + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; registerAlgorithm( + tag_dist_@dist_abbrev@, + QueryTag>( "apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@", embedded_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, sizeof(embedded_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_impl.cuh index eee4b6fa79..e9b9bc6556 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_impl.cuh @@ -11,11 +11,13 @@ namespace cuvs::neighbors::cagra::detail { // No-op normalization fragment implementation // This provides apply_normalization_standard that does nothing (for non-CosineExpanded metrics) +// QueryT is needed to match the descriptor template signature, but not used in this function template + typename DistanceT, + typename QueryT> __device__ DistanceT apply_normalization_standard(DistanceT distance, const typename cuvs::neighbors::cagra::detail:: diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_impl.cuh deleted file mode 100644 index 05ac642eac..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_impl.cuh +++ /dev/null @@ -1,28 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "../compute_distance.hpp" // For dataset_descriptor_base_t definition -#include "../device_common.hpp" - -namespace cuvs::neighbors::cagra::detail { - -// Unified compute_distance function - takes void* args and template parameters -// Standard and VPQ versions are in separate impl headers but use the same function name -// The planner links the appropriate fragment at runtime based on PQ_BITS/PQ_LEN -template -extern __device__ DistanceT -compute_distance(const typename dataset_descriptor_base_t::args_t args, - IndexT dataset_index); - -} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in index 31cfd6e010..aabb917ff6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in @@ -11,10 +11,11 @@ namespace cuvs::neighbors::cagra::detail { -// Instantiate the compute_distance_standard function for standard descriptor -// Note: Metric is no longer a template parameter - it's determined via JIT-linked dist_op fragments +// Instantiate the unified compute_distance function for standard descriptor +// PQ_BITS=0, PQ_LEN=0, CodebookT=void for standard descriptors +// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@>( +template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( const args_t, @index_type@); } // namespace cuvs::neighbors::cagra::detail @@ -23,18 +24,20 @@ template __device__ @distance_type@ compute_distance_standard<@team_size@, @data #include #include -#include "compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { + // This fragment provides compute_distance (standard version) + // The planner links the appropriate fragment (standard or VPQ) based on descriptor type registerAlgorithm( "compute_distance_standard_t@team_size@_dim@dataset_block_dim@", - embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, + sizeof(embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh index 1beff009b4..7aa1a12395 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl.cuh @@ -6,34 +6,34 @@ #pragma once #include "../compute_distance_standard-impl.cuh" -#include "../device_common.hpp" - -#include -#include -#include +#include "../device_common.hpp" // For dataset_descriptor_base_t namespace cuvs::neighbors::cagra::detail { -// Extern function implementation for compute_distance_standard (standard descriptor) -// Returns per-thread distance (team_sum must be called by the caller) -// Note: Metric is no longer a template parameter - it's determined via JIT-linked dist_op fragments +// Unified compute_distance implementation for standard descriptors +// This is instantiated when PQ_BITS=0, PQ_LEN=0, CodebookT=void +// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) template + typename DistanceT, + typename QueryT> __device__ DistanceT -compute_distance_standard(const typename cuvs::neighbors::cagra::detail:: - dataset_descriptor_base_t::args_t args, - IndexT dataset_index) +compute_distance(const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index) { - // Call the free function compute_distance_standard directly with args (already loaded) - // Returns per-thread distance (caller must do team_sum) - using desc_t = cuvs::neighbors::cagra::detail:: - standard_dataset_descriptor_t; - auto per_thread_distance = - cuvs::neighbors::cagra::detail::compute_distance_standard(args, dataset_index); - return per_thread_distance; + // For standard descriptors, PQ_BITS=0, PQ_LEN=0, CodebookT=void + static_assert(PQ_BITS == 0 && PQ_LEN == 0 && std::is_same_v, + "Standard descriptor requires PQ_BITS=0, PQ_LEN=0, CodebookT=void"); + + // Reconstruct the descriptor type with QueryT and call compute_distance_standard + using desc_t = + standard_dataset_descriptor_t; + return compute_distance_standard(args, dataset_index); } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl_unified.cuh deleted file mode 100644 index 5c6d68948c..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_impl_unified.cuh +++ /dev/null @@ -1,37 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "../compute_distance_standard-impl.cuh" -#include "../device_common.hpp" // For dataset_descriptor_base_t -#include "compute_distance_impl.cuh" - -namespace cuvs::neighbors::cagra::detail { - -// Unified compute_distance implementation for standard descriptors -// This is instantiated when PQ_BITS=0, PQ_LEN=0, CodebookT=void -template -__device__ DistanceT -compute_distance(const typename dataset_descriptor_base_t::args_t args, - IndexT dataset_index) -{ - // For standard descriptors, PQ_BITS=0, PQ_LEN=0, CodebookT=void - static_assert(PQ_BITS == 0 && PQ_LEN == 0 && std::is_same_v, - "Standard descriptor requires PQ_BITS=0, PQ_LEN=0, CodebookT=void"); - - // Reconstruct the descriptor type and call compute_distance_standard - using desc_t = standard_dataset_descriptor_t; - return compute_distance_standard(args, dataset_index); -} - -} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in deleted file mode 100644 index 96faf933b3..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_unified.cu.in +++ /dev/null @@ -1,42 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the unified compute_distance function for standard descriptor -// PQ_BITS=0, PQ_LEN=0, CodebookT=void for standard descriptors -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>( - const args_t, @index_type@); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - // This fragment provides compute_distance (standard version) - // The planner links the appropriate fragment (standard or VPQ) based on descriptor type - registerAlgorithm( - "compute_distance_standard_t@team_size@_dim@dataset_block_dim@", - embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index 685d4cefdd..43fe29de11 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -12,11 +12,11 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { // Instantiate the compute_distance_to_child_nodes_kernel_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments // Note: Kernels use dataset_descriptor_base_t* pointer directly -template __global__ void compute_distance_to_child_nodes_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@, cuvs::neighbors::filtering::none_sample_filter>( +template __global__ void compute_distance_to_child_nodes_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@, cuvs::neighbors::filtering::none_sample_filter>( const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search @@ -25,7 +25,7 @@ template __global__ void compute_distance_to_child_nodes_kernel_jit<@team_size@, #include #include -#include "compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -36,8 +36,8 @@ __attribute__((__constructor__)) static void register_compute_distance_to_child_ tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@@codebook_tag@>( "compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, + sizeof(embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in index 18a36fb9ac..595127c538 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in @@ -11,11 +11,11 @@ namespace cuvs::neighbors::cagra::detail { -// Instantiate the compute_distance_vpq function for VPQ descriptor -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded -using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>; +// Instantiate the unified compute_distance function for VPQ descriptor +// PQ_BITS>0, PQ_LEN>0, CodebookT=half for VPQ descriptors +// QueryT is always half for VPQ using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance_vpq<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( +template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( const args_t, @index_type@); } // namespace cuvs::neighbors::cagra::detail @@ -30,6 +30,8 @@ using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { + // This fragment provides compute_distance (VPQ version) + // The planner links the appropriate fragment (standard or VPQ) based on descriptor type registerAlgorithm -#include -#include +#include "../device_common.hpp" // For dataset_descriptor_base_t namespace cuvs::neighbors::cagra::detail { -// Extern function implementation for compute_distance_vpq (VPQ descriptor) -// Returns per-thread distance (team_sum must be called by the caller) -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded +// Unified compute_distance implementation for VPQ descriptors +// This is instantiated when PQ_BITS>0, PQ_LEN>0, CodebookT=half +// QueryT is always half for VPQ template + typename DistanceT, + typename QueryT> __device__ DistanceT -compute_distance_vpq(const typename cuvs::neighbors::cagra::detail:: - dataset_descriptor_base_t::args_t args, - IndexT dataset_index) +compute_distance(const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index) { - // Call the free function compute_distance_vpq directly with args (already loaded) - // Returns per-thread distance (caller must do team_sum) - // VPQ only supports L2Expanded, so Metric is hardcoded - using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; - auto per_thread_distance = - cuvs::neighbors::cagra::detail::compute_distance_vpq(args, dataset_index); - return per_thread_distance; + // For VPQ descriptors, PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half + static_assert( + PQ_BITS > 0 && PQ_LEN > 0 && std::is_same_v && std::is_same_v, + "VPQ descriptor requires PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half"); + + // Reconstruct the descriptor type and call compute_distance_vpq + // QueryT is always half for VPQ + using desc_t = cagra_q_dataset_descriptor_t; + return compute_distance_vpq(args, dataset_index); } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh index e43510b1f2..e21d48a2f1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh @@ -7,12 +7,12 @@ #include "../compute_distance_vpq-impl.cuh" #include "../device_common.hpp" // For dataset_descriptor_base_t -#include "compute_distance_impl.cuh" namespace cuvs::neighbors::cagra::detail { // Unified compute_distance implementation for VPQ descriptors // This is instantiated when PQ_BITS>0, PQ_LEN>0, CodebookT=half +// QueryT is always half for VPQ template + typename DistanceT, + typename QueryT> __device__ DistanceT compute_distance(const typename dataset_descriptor_base_t::args_t args, IndexT dataset_index) { - // For VPQ descriptors, PQ_BITS>0, PQ_LEN>0, CodebookT=half - static_assert(PQ_BITS > 0 && PQ_LEN > 0 && std::is_same_v, - "VPQ descriptor requires PQ_BITS>0, PQ_LEN>0, CodebookT=half"); + // For VPQ descriptors, PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half + static_assert( + PQ_BITS > 0 && PQ_LEN > 0 && std::is_same_v && std::is_same_v, + "VPQ descriptor requires PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half"); // Reconstruct the descriptor type and call compute_distance_vpq + // QueryT is always half for VPQ using desc_t = cagra_q_dataset_descriptor_t; + DistanceT, + QueryT>; return compute_distance_vpq(args, dataset_index); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in deleted file mode 100644 index d89dfeae67..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_unified.cu.in +++ /dev/null @@ -1,43 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -// Instantiate the unified compute_distance function for VPQ descriptor -// PQ_BITS>0, PQ_LEN>0, CodebookT=half for VPQ descriptors -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( - const args_t, @index_type@); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - // This fragment provides compute_distance (VPQ version) - // The planner links the appropriate fragment (standard or VPQ) based on descriptor type - registerAlgorithm( - "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh index dfe93d300f..983fe93fe3 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh @@ -32,7 +32,7 @@ inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; // JIT version of compute_distance_to_random_nodes - uses dataset_descriptor_base_t* pointer // Shared between single_cta and multi_cta JIT kernels -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT template + typename DataT, + typename QueryT> RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( IndexT* __restrict__ result_indices_ptr, // [num_pickup] DistanceT* __restrict__ result_distances_ptr, // [num_pickup] @@ -100,7 +101,8 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( CodebookT, DataT, IndexT, - DistanceT>(args, seed_index); + DistanceT, + QueryT>(args, seed_index); } // Now ALL threads in the team participate in team_sum const auto norm2_sum = device::team_sum(per_thread_norm2, team_size_bits); @@ -134,7 +136,7 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( // JIT version of compute_distance_to_child_nodes - uses dataset_descriptor_base_t* pointer // Shared between single_cta and multi_cta JIT kernels -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT template RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( IndexT* __restrict__ result_child_indices_ptr, @@ -224,7 +227,8 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( CodebookT, DataT, IndexT, - DistanceT>(args, child_id); + DistanceT, + QueryT>(args, child_id); } else { // Invalid child_id: lead lane gets upper_bound, others get 0 per_thread_dist = lead_lane ? raft::upper_bound() : 0; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in index 46314c3e7f..96d09d9ba1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in @@ -13,7 +13,9 @@ namespace cuvs::neighbors::cagra::detail { // Instantiate the dist_op function for the specific metric // Each fragment provides dist_op for a specific metric - planner links the appropriate one -template __device__ @distance_type@ dist_op<@data_type@, @distance_type@>(@data_type@, @data_type@); +// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) +// DistanceT is always float +template __device__ @distance_type@ dist_op<@query_type@, @distance_type@>(@query_type@, @query_type@); } // namespace cuvs::neighbors::cagra::detail @@ -21,17 +23,19 @@ template __device__ @distance_type@ dist_op<@data_type@, @distance_type@>(@data_ #include #include -#include "dist_op_@metric_tag@_@type_abbrev@_@dist_abbrev@.h" +#include "dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_dist_op_@metric_tag@_@type_abbrev@_@dist_abbrev@() +__attribute__((__constructor__)) static void register_dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@() { - registerAlgorithm( "dist_op_@metric_tag@", - embedded_dist_op_@metric_tag@_@type_abbrev@_@dist_abbrev@, - sizeof(embedded_dist_op_@metric_tag@_@type_abbrev@_@dist_abbrev@)); + embedded_dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@, + sizeof(embedded_dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh index aa29e475aa..908fc2600a 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh @@ -10,8 +10,9 @@ namespace cuvs::neighbors::cagra::detail { // dist_op fragment for CosineExpanded metric (same as InnerProduct) -template -__device__ DISTANCE_T dist_op(DATA_T a, DATA_T b) +// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) +template +__device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) { return -static_cast(a) * static_cast(b); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh index 728970492f..9dfb23001e 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh @@ -11,8 +11,9 @@ namespace cuvs::neighbors::cagra::detail { // dist_op fragment for BitwiseHamming metric -template -__device__ DISTANCE_T dist_op(DATA_T a, DATA_T b) +// QueryT is uint8_t for BitwiseHamming +template +__device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) { // mask the result of xor for the integer promotion const auto v = (a ^ b) & 0xffu; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh index bc2446316d..b02dc566f2 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh @@ -10,8 +10,9 @@ namespace cuvs::neighbors::cagra::detail { // dist_op fragment for InnerProduct metric -template -__device__ DISTANCE_T dist_op(DATA_T a, DATA_T b) +// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) +template +__device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) { return -static_cast(a) * static_cast(b); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh index 170d0969e9..31dac7d2ed 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh @@ -10,8 +10,9 @@ namespace cuvs::neighbors::cagra::detail { // dist_op fragment for L2Expanded metric -template -__device__ DISTANCE_T dist_op(DATA_T a, DATA_T b) +// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) +template +__device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) { DISTANCE_T diff = a - b; return diff * diff; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh index 737e466aac..2da95a015c 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -14,12 +14,13 @@ #include namespace cuvs::neighbors::cagra::detail { -// Forward declarations matching the JIT LTO version (no Metric parameter) +// Forward declarations matching the JIT LTO version (no Metric parameter, includes QueryT) template + typename DistanceT, + typename QueryT> struct standard_dataset_descriptor_t; template + typename DistanceT, + typename QueryT> struct cagra_q_dataset_descriptor_t; } // namespace cuvs::neighbors::cagra::detail @@ -41,6 +43,7 @@ namespace cuvs::neighbors::cagra::detail { // Unified setup_workspace and compute_distance extern functions // These take dataset_descriptor_base_t* and reconstruct the derived descriptor inside // Standard and VPQ versions are in separate impl headers but use the same function name +// QueryT is needed to reconstruct the descriptor type correctly template + typename DistanceT, + typename QueryT> extern __device__ dataset_descriptor_base_t* setup_workspace( dataset_descriptor_base_t* desc_ptr, void* smem, @@ -62,81 +66,11 @@ template + typename DistanceT, + typename QueryT> extern __device__ DistanceT compute_distance(const typename dataset_descriptor_base_t::args_t args, IndexT dataset_index); - -// Standard descriptor extern functions (kept for backward compatibility, but prefer unified -// versions) Note: Metric is no longer a template parameter - it's linked via dist_op and -// normalization fragments -template -extern __device__ const - standard_dataset_descriptor_t* - setup_workspace_standard( - const standard_dataset_descriptor_t* desc, - void* smem, - const DataT* queries, - uint32_t query_id); - -// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization -// fragments -template -extern __device__ DistanceT compute_distance_standard( - const typename dataset_descriptor_base_t::args_t args, - IndexT dataset_index); - -// VPQ descriptor extern functions -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded -template -extern __device__ const cagra_q_dataset_descriptor_t* -setup_workspace_vpq(const cagra_q_dataset_descriptor_t* desc, - void* smem, - const DataT* queries, - uint32_t query_id); - -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded -template -extern __device__ DistanceT compute_distance_vpq( - const typename dataset_descriptor_base_t::args_t args, - IndexT dataset_index); - } // namespace cuvs::neighbors::cagra::detail namespace cuvs::neighbors::detail { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in index c96bc04771..5cb740ac83 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -12,11 +12,11 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { // Instantiate the random_pickup_kernel_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments // Note: Kernels use dataset_descriptor_base_t* pointer directly -template __global__ void random_pickup_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( +template __global__ void random_pickup_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search @@ -25,7 +25,7 @@ template __global__ void random_pickup_kernel_jit<@team_size@, @dataset_block_di #include #include -#include "random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -36,8 +36,8 @@ __attribute__((__constructor__)) static void register_random_pickup_kernel@pq_pr tag_dist_@dist_abbrev@, tag_idx_@idx_abbrev@@codebook_tag@>( "random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, + sizeof(embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index 0069b0a98f..d1042e0427 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -15,11 +15,11 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { // Instantiate the search_kernel_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments // Note: Kernels use dataset_descriptor_base_t* pointer directly -template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( +template __global__ void search_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@>( @index_type@* const, @distance_type@* const, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::multi_cta_search @@ -28,7 +28,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@team_size #include #include -#include "search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -39,8 +39,8 @@ __attribute__((__constructor__)) static void register_search_multi_cta_kernel@pq tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@@codebook_tag@>( "search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh index 1e79d54835..5a8b6079eb 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -43,7 +43,7 @@ using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_ji using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; // JIT version of search_kernel - uses dataset_descriptor_base_t* pointer -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT // Filter is linked separately via JIT LTO, so we use none_sample_filter directly template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( IndexT* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] @@ -134,7 +135,8 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( CodebookT, DataT, IndexT, - DistanceT>(dataset_desc, smem, queries_ptr, query_id); + DistanceT, + QueryT>(dataset_desc, smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); @@ -173,20 +175,21 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( CodebookT, IndexT, DistanceT, - DataT>(result_indices_buffer, - result_distances_buffer, - smem_desc, - graph_degree, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - visited_hash_bitlen, - local_traversed_hashmap_ptr, - traversed_hash_bitlen, - block_id, - num_blocks); + DataT, + QueryT>(result_indices_buffer, + result_distances_buffer, + smem_desc, + graph_degree, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + visited_hash_bitlen, + local_traversed_hashmap_ptr, + traversed_hash_bitlen, + block_id, + num_blocks); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -274,6 +277,7 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( IndexT, DistanceT, DataT, + QueryT, 0>(result_indices_buffer, result_distances_buffer, smem_desc, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index 188a5e481d..79b17b41f6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -119,7 +119,14 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; default: metric_tag = "unknown"; break; } - auto params = make_fragment_key(); + // QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) + // DistanceT is always float + std::string params; + if (metric == cuvs::distance::DistanceType::BitwiseHamming) { + params = make_fragment_key(); + } else { + params = make_fragment_key(); + } std::string key = "dist_op_" + metric_tag + "_" + params; this->device_functions.push_back(key); } @@ -137,7 +144,10 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { } else { normalization_type = "noop"; } - auto params = make_fragment_key(); + // QueryT is always float for normalization (only used for CosineExpanded which uses float + // queries) + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; // Always float for normalization + auto params = make_fragment_key(); std::string key = "apply_normalization_standard_" + normalization_type; key += "_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh index 511612e0b8..6368a21f28 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh @@ -37,7 +37,7 @@ template inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; // JIT version of random_pickup_kernel - uses dataset_descriptor_base_t* pointer -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT template + typename DistanceT, + typename QueryT> RAFT_KERNEL random_pickup_kernel_jit( dataset_descriptor_base_t* dataset_desc, const DataT* const queries_ptr, // [num_queries, dataset_dim] @@ -84,7 +85,8 @@ RAFT_KERNEL random_pickup_kernel_jit( CodebookT, DataT, IndexT, - DistanceT>(dataset_desc, smem, queries_ptr, query_id); + DistanceT, + QueryT>(dataset_desc, smem, queries_ptr, query_id); __syncthreads(); // Load args once for better performance (avoid repeated loads in the loop) @@ -116,7 +118,8 @@ RAFT_KERNEL random_pickup_kernel_jit( CodebookT, DataT, IndexT, - DistanceT>(args, seed_index); + DistanceT, + QueryT>(args, seed_index); // Now ALL threads in the team participate in team_sum const auto norm2 = device::team_sum(per_thread_norm2, team_size_bits); @@ -140,7 +143,8 @@ RAFT_KERNEL random_pickup_kernel_jit( } // JIT version of compute_distance_to_child_nodes_kernel - uses extern functions with void* -// descriptor Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT +// descriptor Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, +// QueryT template RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( @@ -193,7 +198,8 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( CodebookT, DataT, IndexT, - DistanceT>(dataset_desc, smem, query_ptr, query_id); + DistanceT, + QueryT>(dataset_desc, smem, query_ptr, query_id); __syncthreads(); if (global_team_id >= search_width * graph_degree) { return; } @@ -237,7 +243,8 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( CodebookT, DataT, IndexT, - DistanceT>(args, child_id); + DistanceT, + QueryT>(args, child_id); } // Now ALL threads in the team participate in team_sum DISTANCE_T norm2 = device::team_sum(per_thread_norm2, team_size_bits); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index ba44a5d9cf..d54cbca3b9 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -122,7 +122,14 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; default: metric_tag = "unknown"; break; } - auto params = make_fragment_key(); + // QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) + // DistanceT is always float + std::string params; + if (metric == cuvs::distance::DistanceType::BitwiseHamming) { + params = make_fragment_key(); + } else { + params = make_fragment_key(); + } std::string key = "dist_op_" + metric_tag + "_" + params; this->device_functions.push_back(key); } @@ -140,7 +147,10 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { } else { normalization_type = "noop"; } - auto params = make_fragment_key(); + // QueryT is always float for normalization (only used for CosineExpanded which uses float + // queries) + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; // Always float for normalization + auto params = make_fragment_key(); std::string key = "apply_normalization_standard_" + normalization_type; key += "_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index 24fb9f0dab..7bf948edf7 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -12,11 +12,11 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { // Instantiate the search_kernel_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments // Note: Kernels use dataset_descriptor_base_t* pointer directly -template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( +template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@>( uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search @@ -25,7 +25,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_b #include #include -#include "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -36,8 +36,8 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_@ tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@@codebook_tag@>( "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh index 0162f24db3..c9b42fffc8 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -85,9 +85,9 @@ using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_ji using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; // JIT version of search_core - uses dataset_descriptor_base_t* pointer -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for +// BitwiseHamming) For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half template RAFT_DEVICE_INLINE_FUNCTION void search_core( uintptr_t result_indices_ptr, @@ -174,7 +175,8 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( CodebookT, DataT, IndexT, - DistanceT>(dataset_desc, smem, queries_ptr, query_id); + DistanceT, + QueryT>(dataset_desc, smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); @@ -219,18 +221,19 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( CodebookT, IndexT, DistanceT, - DataT>(result_indices_buffer, - result_distances_buffer, - smem_desc, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen, - (IndexT*)nullptr, - 0); + DataT, + QueryT>(result_indices_buffer, + result_distances_buffer, + smem_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -345,18 +348,19 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( CodebookT, IndexT, DistanceT, - DataT>(result_indices_buffer + internal_topk, - result_distances_buffer + internal_topk, - smem_desc, - knn_graph, - graph_degree, - local_visited_hashmap_ptr, - hash_bitlen, - (IndexT*)nullptr, - 0, - parent_list_buffer, - result_indices_buffer, - search_width); + DataT, + QueryT>(result_indices_buffer + internal_topk, + result_distances_buffer + internal_topk, + smem_desc, + knn_graph, + graph_degree, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0, + parent_list_buffer, + result_indices_buffer, + search_width); // Critical: __syncthreads() must be reached by ALL threads // If any thread is stuck in compute_distance_to_child_nodes_jit, this will hang __syncthreads(); @@ -525,7 +529,7 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( } // JIT kernel wrapper - calls search_core -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT template RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( uintptr_t result_indices_ptr, @@ -577,6 +582,7 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( DataT, IndexT, DistanceT, + QueryT, SourceIndexT>(result_indices_ptr, result_distances_ptr, top_k, @@ -617,7 +623,7 @@ struct job_desc_jit_helper_desc { }; // JIT persistent kernel - uses extern functions and JIT search_core -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT +// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT template RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_p_jit( worker_handle_t* worker_handles, @@ -709,6 +716,7 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_p_jit( DataT, IndexT, DistanceT, + QueryT, SourceIndexT>(result_indices_ptr, result_distances_ptr, top_k, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in index 41eb6bd076..5bec86da8d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in @@ -12,11 +12,11 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { // Instantiate the search_kernel_p_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half +// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) +// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half // Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments // Note: Kernels use dataset_descriptor_base_t* pointer directly -template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( +template __global__ void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@>( worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search @@ -25,7 +25,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_p_jit<@topk_by #include #include -#include "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" +#include "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -36,8 +36,8 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_p tag_dist_@dist_abbrev@, tag_idx_@src_idx_abbrev@@codebook_tag@>( "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); + embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, + sizeof(embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index c482d2b593..a6e75eb237 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -130,7 +130,14 @@ struct CagraSearchPlanner : AlgorithmPlanner { case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; default: metric_tag = "unknown"; break; } - auto params = make_fragment_key(); + // QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) + // DistanceT is always float + std::string params; + if (metric == cuvs::distance::DistanceType::BitwiseHamming) { + params = make_fragment_key(); + } else { + params = make_fragment_key(); + } std::string key = "dist_op_" + metric_tag + "_" + params; this->device_functions.push_back(key); } @@ -142,13 +149,17 @@ struct CagraSearchPlanner : AlgorithmPlanner { // Both cosine and noop fragments provide the same function name "apply_normalization_standard" // but register with different fragment names. The planner links the appropriate one based on // metric. + // QueryT is always float for normalization (only used for CosineExpanded which uses float + // queries) std::string normalization_type; if (metric == cuvs::distance::DistanceType::CosineExpanded) { normalization_type = "cosine"; } else { normalization_type = "noop"; } - auto params = make_fragment_key(); + // For lookup, we need to manually append fragment key parameters + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; // Always float for normalization + auto params = make_fragment_key(); std::string key = "apply_normalization_standard_" + normalization_type; key += "_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh deleted file mode 100644 index a4ffccfb60..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_impl.cuh +++ /dev/null @@ -1,29 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "../device_common.hpp" - -namespace cuvs::neighbors::cagra::detail { - -// Unified setup_workspace function - takes dataset_descriptor_base_t* and template parameters -// Standard and VPQ versions are in separate impl headers but use the same function name -// The planner links the appropriate fragment at runtime based on PQ_BITS/PQ_LEN -template -extern __device__ dataset_descriptor_base_t* setup_workspace( - dataset_descriptor_base_t* desc_ptr, - void* smem, - const DataT* queries, - uint32_t query_id); - -} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in index ffb86f7696..1d23ccc218 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in @@ -7,13 +7,14 @@ #ifdef BUILD_KERNEL -#include +#include namespace cuvs::neighbors::cagra::detail { // Instantiate the unified setup_workspace function for standard descriptor // PQ_BITS=0, PQ_LEN=0, CodebookT=void for standard descriptors -template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@>( +// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) +template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail @@ -22,7 +23,7 @@ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@d #include #include -#include "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; @@ -34,8 +35,8 @@ __attribute__((__constructor__)) static void register_setup_workspace_standard_t tag_idx_@idx_abbrev@, tag_dist_@dist_abbrev@>( "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@", - embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, + sizeof(embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh index 39280ca238..61d152623d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl.cuh @@ -8,41 +8,40 @@ #include "../compute_distance_standard-impl.cuh" #include "../device_common.hpp" -#include -#include -#include - namespace cuvs::neighbors::cagra::detail { -// Extern function implementation for setup_workspace_standard (standard descriptor) -// Takes the concrete descriptor pointer and calls the free function directly (not through function -// pointer) For JIT LTO, the descriptor's setup_workspace_impl is nullptr, so we must call the free -// function directly -// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization -// fragments +// Unified setup_workspace implementation for standard descriptors +// This is instantiated when PQ_BITS=0, PQ_LEN=0, CodebookT=void +// Takes dataset_descriptor_base_t* and reconstructs the derived descriptor inside +// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) template -__device__ const cuvs::neighbors::cagra::detail:: - standard_dataset_descriptor_t* - setup_workspace_standard( - const cuvs::neighbors::cagra::detail:: - standard_dataset_descriptor_t* desc, - void* smem, - const DataT* queries, - uint32_t query_id) + typename DistanceT, + typename QueryT> +__device__ dataset_descriptor_base_t* setup_workspace( + dataset_descriptor_base_t* desc_ptr, + void* smem, + const DataT* queries, + uint32_t query_id) { - // CRITICAL: This function uses __syncthreads() and expects ALL threads to call it - // If only thread 0 calls it, __syncthreads() will hang forever - // Call the free function directly (not desc->setup_workspace() which uses a function pointer) - // The free function is in compute_distance_standard-impl.cuh - using desc_t = cuvs::neighbors::cagra::detail:: - standard_dataset_descriptor_t; - const auto* result = - cuvs::neighbors::cagra::detail::setup_workspace_standard(desc, smem, queries, query_id); - return result; + // For standard descriptors, PQ_BITS=0, PQ_LEN=0, CodebookT=void + static_assert(PQ_BITS == 0 && PQ_LEN == 0 && std::is_same_v, + "Standard descriptor requires PQ_BITS=0, PQ_LEN=0, CodebookT=void"); + + // Reconstruct the descriptor pointer from base pointer with QueryT + using desc_t = + standard_dataset_descriptor_t; + const desc_t* desc = static_cast(desc_ptr); + + // Call the free function directly - it takes DescriptorT as template parameter + const desc_t* result = setup_workspace_standard(desc, smem, queries, query_id); + return const_cast*>( + static_cast*>(result)); } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh deleted file mode 100644 index 539dbf1fdb..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_impl_unified.cuh +++ /dev/null @@ -1,45 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "../compute_distance_standard-impl.cuh" -#include "../device_common.hpp" -#include "setup_workspace_impl.cuh" - -namespace cuvs::neighbors::cagra::detail { - -// Unified setup_workspace implementation for standard descriptors -// This is instantiated when PQ_BITS=0, PQ_LEN=0, CodebookT=void -// Takes dataset_descriptor_base_t* and reconstructs the derived descriptor inside -template -__device__ dataset_descriptor_base_t* setup_workspace( - dataset_descriptor_base_t* desc_ptr, - void* smem, - const DataT* queries, - uint32_t query_id) -{ - // For standard descriptors, PQ_BITS=0, PQ_LEN=0, CodebookT=void - static_assert(PQ_BITS == 0 && PQ_LEN == 0 && std::is_same_v, - "Standard descriptor requires PQ_BITS=0, PQ_LEN=0, CodebookT=void"); - - // Reconstruct the descriptor pointer from base pointer - using desc_t = standard_dataset_descriptor_t; - const desc_t* desc = static_cast(desc_ptr); - - // Call the free function directly - it takes DescriptorT as template parameter - const desc_t* result = setup_workspace_standard(desc, smem, queries, query_id); - return const_cast*>( - static_cast*>(result)); -} - -} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in index 694697c075..d608c10506 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in @@ -7,13 +7,14 @@ #ifdef BUILD_KERNEL -#include +#include namespace cuvs::neighbors::cagra::detail { // Instantiate the unified setup_workspace function for VPQ descriptor // PQ_BITS>0, PQ_LEN>0, CodebookT=half for VPQ descriptors -template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@>( +// QueryT is always half for VPQ +template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh index 0c300911bf..a6c6956066 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl.cuh @@ -8,17 +8,12 @@ #include "../compute_distance_vpq-impl.cuh" #include "../device_common.hpp" -#include -#include -#include - namespace cuvs::neighbors::cagra::detail { -// Extern function implementation for setup_workspace_vpq (VPQ descriptor) -// Takes the concrete descriptor pointer and calls the free function directly (not through function -// pointer) For JIT LTO, the descriptor's setup_workspace_impl is nullptr, so we must call the free -// function directly -// Note: Metric is no longer a template parameter - VPQ only supports L2Expanded +// Unified setup_workspace implementation for VPQ descriptors +// This is instantiated when PQ_BITS>0, PQ_LEN>0, CodebookT=half +// Takes dataset_descriptor_base_t* and reconstructs the derived descriptor inside +// QueryT is always half for VPQ template -__device__ const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t* -setup_workspace_vpq( - const cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t* desc, + typename DistanceT, + typename QueryT> +__device__ dataset_descriptor_base_t* setup_workspace( + dataset_descriptor_base_t* desc_ptr, void* smem, const DataT* queries, uint32_t query_id) { - // Call the free function directly (not desc->setup_workspace() which uses a function pointer) - // The free function is in compute_distance_vpq-impl.cuh - // VPQ only supports L2Expanded, so Metric is hardcoded - using desc_t = cuvs::neighbors::cagra::detail::cagra_q_dataset_descriptor_t; - const auto* result = - cuvs::neighbors::cagra::detail::setup_workspace_vpq(desc, smem, queries, query_id); - return result; + // For VPQ descriptors, PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half + static_assert( + PQ_BITS > 0 && PQ_LEN > 0 && std::is_same_v && std::is_same_v, + "VPQ descriptor requires PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half"); + + // Reconstruct the descriptor pointer from base pointer + // QueryT is always half for VPQ + using desc_t = cagra_q_dataset_descriptor_t; + const desc_t* desc = static_cast(desc_ptr); + + // Call the free function directly - it takes DescriptorT as template parameter + const desc_t* result = setup_workspace_vpq(desc, smem, queries, query_id); + return const_cast*>( + static_cast*>(result)); } } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl_unified.cuh deleted file mode 100644 index ac388ad341..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_impl_unified.cuh +++ /dev/null @@ -1,52 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "../compute_distance_vpq-impl.cuh" -#include "../device_common.hpp" -#include "setup_workspace_impl.cuh" - -namespace cuvs::neighbors::cagra::detail { - -// Unified setup_workspace implementation for VPQ descriptors -// This is instantiated when PQ_BITS>0, PQ_LEN>0, CodebookT=half -// Takes dataset_descriptor_base_t* and reconstructs the derived descriptor inside -template -__device__ dataset_descriptor_base_t* setup_workspace( - dataset_descriptor_base_t* desc_ptr, - void* smem, - const DataT* queries, - uint32_t query_id) -{ - // For VPQ descriptors, PQ_BITS>0, PQ_LEN>0, CodebookT=half - static_assert(PQ_BITS > 0 && PQ_LEN > 0 && std::is_same_v, - "VPQ descriptor requires PQ_BITS>0, PQ_LEN>0, CodebookT=half"); - - // Reconstruct the descriptor pointer from base pointer - using desc_t = cagra_q_dataset_descriptor_t; - const desc_t* desc = static_cast(desc_ptr); - - // Call the free function directly - it takes DescriptorT as template parameter - const desc_t* result = setup_workspace_vpq(desc, smem, queries, query_id); - return const_cast*>( - static_cast*>(result)); -} - -} // namespace cuvs::neighbors::cagra::detail From 1ccb01c7c90f6c231b808c63835af37b7ae9d399 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 20 Feb 2026 20:17:08 +0000 Subject: [PATCH 120/158] cleanup some stuff --- .../cuvs/detail/jit_lto/AlgorithmLauncher.hpp | 15 ---- .../jit_lto/cagra/search_single_cta_tags.hpp | 49 ------------ .../ivf_flat/interleaved_scan_tags.hpp | 48 ------------ .../cuvs/detail/jit_lto/registration_tags.hpp | 77 +++++++++++++++++++ cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 34 +------- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 45 ++--------- .../jit_lto_kernels/apply_filter_kernel.cu.in | 2 +- .../apply_normalization_standard_cosine.cu.in | 8 +- .../apply_normalization_standard_noop.cu.in | 8 +- .../compute_distance_standard.cu.in | 11 +-- ...mpute_distance_to_child_nodes_kernel.cu.in | 2 +- .../compute_distance_vpq.cu.in | 11 +-- .../cagra/jit_lto_kernels/dist_op.cu.in | 8 +- .../random_pickup_kernel.cu.in | 2 +- .../search_multi_cta_kernel.cu.in | 2 +- .../search_multi_cta_planner.hpp | 58 +++++++------- .../search_multi_kernel_planner.hpp | 59 +++++++------- .../search_single_cta_kernel.cu.in | 2 +- .../search_single_cta_kernel_p.cu.in | 2 +- .../search_single_cta_planner.hpp | 61 +++++++-------- .../setup_workspace_standard.cu.in | 11 +-- .../jit_lto_kernels/setup_workspace_vpq.cu.in | 11 +-- .../search_multi_cta_kernel_launcher_jit.cuh | 2 +- .../detail/cagra/search_multi_kernel.cuh | 2 +- .../search_single_cta_kernel_launcher_jit.cuh | 2 +- .../detail/cagra/shared_launcher_jit.hpp | 2 +- .../ivf_flat_interleaved_scan_jit.cuh | 17 ++-- .../interleaved_scan_kernel.cu.in | 2 +- .../ivf_flat/jit_lto_kernels/metric.cu.in | 2 +- 29 files changed, 205 insertions(+), 350 deletions(-) delete mode 100644 cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp delete mode 100644 cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp create mode 100644 cpp/include/cuvs/detail/jit_lto/registration_tags.hpp diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp index 3d66739ce2..6f551170c4 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp @@ -21,31 +21,16 @@ struct AlgorithmLauncher { ~AlgorithmLauncher(); - // Delete copy constructor and assignment to prevent accidental copying AlgorithmLauncher(const AlgorithmLauncher&) = delete; AlgorithmLauncher& operator=(const AlgorithmLauncher&) = delete; - // Allow move constructor and assignment AlgorithmLauncher(AlgorithmLauncher&& other) noexcept; AlgorithmLauncher& operator=(AlgorithmLauncher&& other) noexcept; template void dispatch(cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, Args&&... args) { - // Create array of pointers to arguments - // NOTE: cudaLaunchKernelExC copies the parameter values synchronously before returning, - // so the local array and argument references are safe even though the kernel launch is async void* kernel_args[] = {const_cast(static_cast(&args))...}; - - // Validate that we're not passing null pointers for critical parameters - // (This is a sanity check - actual validation should be done by callers) - for (size_t i = 0; i < sizeof...(args); ++i) { - if (kernel_args[i] == nullptr) { - // Some parameters might legitimately be nullptr, so we just log a warning - // The kernel itself should validate critical pointers - } - } - this->call(stream, grid, block, shared_mem, kernel_args); } diff --git a/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp b/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp deleted file mode 100644 index 2db9a35583..0000000000 --- a/cpp/include/cuvs/detail/jit_lto/cagra/search_single_cta_tags.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -namespace cuvs::neighbors::cagra::detail { - -// Tag types for data types -struct tag_f {}; // float -struct tag_h {}; // __half -struct tag_sc {}; // int8_t -struct tag_uc {}; // uint8_t - -// Tag types for index types -struct tag_idx_ui {}; // uint32_t -struct tag_idx_l {}; // int64_t - -// Tag types for distance types -struct tag_dist_f {}; // float - -// Tag types for distance metrics -struct tag_metric_l2 {}; -struct tag_metric_inner_product {}; -struct tag_metric_cosine {}; -struct tag_metric_hamming {}; - -// Tag types for team sizes -struct tag_team_8 {}; -struct tag_team_16 {}; -struct tag_team_32 {}; - -// Tag types for dataset block dimensions -struct tag_dim_128 {}; -struct tag_dim_256 {}; -struct tag_dim_512 {}; - -// Tag types for sample filter types -struct tag_filter_none {}; -struct tag_filter_bitset {}; - -// Tag types for VPQ parameters -struct tag_pq_bits_8 {}; -struct tag_pq_len_2 {}; -struct tag_pq_len_4 {}; -struct tag_codebook_half {}; - -} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp b/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp deleted file mode 100644 index 19ded7e8ad..0000000000 --- a/cpp/include/cuvs/detail/jit_lto/ivf_flat/interleaved_scan_tags.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -namespace cuvs::neighbors::ivf_flat::detail { - -// Tag types for data types -struct tag_f {}; -struct tag_h {}; -struct tag_sc {}; -struct tag_uc {}; - -// Tag types for accumulator types -struct tag_acc_f {}; -struct tag_acc_h {}; -struct tag_acc_i {}; -struct tag_acc_ui {}; - -// Tag types for index types -struct tag_idx_l {}; - -// Tag types for filter subtypes -struct tag_filter_bitset_impl {}; -struct tag_filter_none_impl {}; - -// Tag types for sample filter types with full template info -template -struct tag_filter {}; - -// Tag types for distance metrics with full template info -template -struct tag_metric_euclidean {}; - -template -struct tag_metric_inner_product {}; - -template -struct tag_metric_custom_udf {}; - -// Tag types for post-processing -struct tag_post_identity {}; -struct tag_post_sqrt {}; -struct tag_post_compose {}; - -} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/include/cuvs/detail/jit_lto/registration_tags.hpp b/cpp/include/cuvs/detail/jit_lto/registration_tags.hpp new file mode 100644 index 0000000000..b9d244f799 --- /dev/null +++ b/cpp/include/cuvs/detail/jit_lto/registration_tags.hpp @@ -0,0 +1,77 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +namespace cuvs::detail::jit_lto { + +struct tag_f {}; +struct tag_h {}; +struct tag_sc {}; +struct tag_uc {}; +struct tag_idx_l {}; +struct tag_filter_none {}; +struct tag_filter_bitset {}; + +} // namespace cuvs::detail::jit_lto + +namespace cuvs::neighbors::cagra::detail { + +using cuvs::detail::jit_lto::tag_f; +using cuvs::detail::jit_lto::tag_filter_bitset; +using cuvs::detail::jit_lto::tag_filter_none; +using cuvs::detail::jit_lto::tag_h; +using cuvs::detail::jit_lto::tag_idx_l; +using cuvs::detail::jit_lto::tag_sc; +using cuvs::detail::jit_lto::tag_uc; + +struct tag_idx_ui {}; +struct tag_dist_f {}; +struct tag_metric_l2 {}; +struct tag_metric_inner_product {}; +struct tag_metric_cosine {}; +struct tag_metric_hamming {}; +struct tag_team_8 {}; +struct tag_team_16 {}; +struct tag_team_32 {}; +struct tag_dim_128 {}; +struct tag_dim_256 {}; +struct tag_dim_512 {}; +struct tag_pq_bits_8 {}; +struct tag_pq_len_2 {}; +struct tag_pq_len_4 {}; +struct tag_codebook_half {}; + +} // namespace cuvs::neighbors::cagra::detail + +namespace cuvs::neighbors::ivf_flat::detail { + +using cuvs::detail::jit_lto::tag_f; +using cuvs::detail::jit_lto::tag_filter_bitset; +using cuvs::detail::jit_lto::tag_filter_none; +using cuvs::detail::jit_lto::tag_h; +using cuvs::detail::jit_lto::tag_idx_l; +using cuvs::detail::jit_lto::tag_sc; +using cuvs::detail::jit_lto::tag_uc; + +struct tag_acc_f {}; +struct tag_acc_h {}; +struct tag_acc_i {}; +struct tag_acc_ui {}; + +template +struct tag_metric_euclidean {}; + +template +struct tag_metric_inner_product {}; + +template +struct tag_metric_custom_udf {}; + +struct tag_post_identity {}; +struct tag_post_sqrt {}; +struct tag_post_compose {}; + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 8114095fdd..8dda21c69a 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -5,8 +5,6 @@ #include -#include - #include AlgorithmLauncher::AlgorithmLauncher(cudaKernel_t k, cudaLibrary_t lib) : kernel{k}, library{lib} {} @@ -26,7 +24,6 @@ AlgorithmLauncher::AlgorithmLauncher(AlgorithmLauncher&& other) noexcept AlgorithmLauncher& AlgorithmLauncher::operator=(AlgorithmLauncher&& other) noexcept { if (this != &other) { - // Unload current library if it exists if (library != nullptr) { cudaLibraryUnload(library); } kernel = other.kernel; library = other.library; @@ -39,17 +36,10 @@ AlgorithmLauncher& AlgorithmLauncher::operator=(AlgorithmLauncher&& other) noexc void AlgorithmLauncher::call( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) { - // Validate kernel and library handles before use if (kernel == nullptr) { RAFT_FAIL("AlgorithmLauncher::call - kernel is NULL!"); } if (library == nullptr) { RAFT_FAIL("AlgorithmLauncher::call - library is NULL!"); } if (kernel_args == nullptr) { RAFT_FAIL("AlgorithmLauncher::call - kernel_args is NULL!"); } - // Debug: verify kernel is being called - std::cerr << "[JIT] AlgorithmLauncher::call - kernel is not null, launching with grid=(" << grid.x - << "," << grid.y << "," << grid.z << ") block=(" << block.x << "," << block.y << "," - << block.z << ")" << std::endl; - std::cerr.flush(); - cudaLaunchAttribute attribute[1]; attribute[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; attribute[0].val.programmaticStreamSerializationAllowed = 1; @@ -62,29 +52,7 @@ void AlgorithmLauncher::call( config.numAttrs = 1; config.dynamicSmemBytes = shared_mem; - std::cerr << "[JIT] AlgorithmLauncher::call - About to launch kernel" << std::endl; - std::cerr.flush(); - - // NOTE: cudaLaunchKernelExC copies parameter values synchronously before returning, - // so the kernel_args array and the values it points to are safe even though the launch is async - cudaError_t err = cudaLaunchKernelExC(&config, kernel, kernel_args); - if (err != cudaSuccess) { - std::cerr << "[JIT] ERROR: cudaLaunchKernelExC failed with: " << cudaGetErrorString(err) << " (" - << err << ")" << std::endl; - std::cerr.flush(); - } else { - std::cerr << "[JIT] Kernel launch succeeded" << std::endl; - std::cerr.flush(); - } - RAFT_CUDA_TRY(err); - - // Check for immediate errors after launch (catches parameter issues early) - cudaError_t peek_err = cudaPeekAtLastError(); - if (peek_err != cudaSuccess) { - std::cerr << "[JIT] WARNING: Error detected immediately after kernel launch: " - << cudaGetErrorString(peek_err) << " (" << peek_err << ")" << std::endl; - std::cerr.flush(); - } + RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); } void AlgorithmLauncher::call_cooperative( diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index e83b466f24..0ae28d3ced 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -25,29 +25,14 @@ void AlgorithmPlanner::add_entrypoint() { - RAFT_LOG_INFO("[JIT FRAGMENT] Looking up entrypoint fragment: %s", this->entrypoint.c_str()); auto entrypoint_fragment = fragment_database().get_fragment(this->entrypoint); - if (entrypoint_fragment == nullptr) { - RAFT_FAIL("Entrypoint fragment is NULL for: %s", this->entrypoint.c_str()); - } - RAFT_LOG_INFO("[JIT FRAGMENT] Entrypoint fragment found: %s (ptr: %p)", - this->entrypoint.c_str(), - entrypoint_fragment); this->fragments.push_back(entrypoint_fragment); } void AlgorithmPlanner::add_device_functions() { for (const auto& device_function_key : this->device_functions) { - RAFT_LOG_INFO("[JIT FRAGMENT] Looking up device function fragment: %s", - device_function_key.c_str()); auto device_function_fragment = fragment_database().get_fragment(device_function_key); - if (device_function_fragment == nullptr) { - RAFT_FAIL("Device function fragment is NULL for: %s", device_function_key.c_str()); - } - RAFT_LOG_INFO("[JIT FRAGMENT] Device function fragment found: %s (ptr: %p)", - device_function_key.c_str(), - device_function_fragment); this->fragments.push_back(device_function_fragment); } } @@ -69,24 +54,17 @@ std::shared_ptr AlgorithmPlanner::get_launcher() static std::mutex cache_mutex; std::lock_guard lock(cache_mutex); if (launchers.count(launch_key) == 0) { - RAFT_LOG_INFO( - "[JIT CACHE] Cache MISS - Building new launcher for key: %s (entrypoint: %s, " - "device_functions: %s)", - launch_key.c_str(), - this->entrypoint.c_str(), - this->get_device_functions_key().c_str()); add_entrypoint(); add_device_functions(); + RAFT_LOG_INFO("A first-time JIT compilation has been triggered for your algorithm"); + std::string log_message = + "JIT compiling launcher for entrypoint: " + this->entrypoint + " and device functions: "; + for (const auto& device_function : this->device_functions) { + log_message += device_function + ","; + } + log_message.pop_back(); + RAFT_LOG_DEBUG("%s", log_message.c_str()); launchers[launch_key] = this->build(); - RAFT_LOG_INFO("[JIT CACHE] Launcher built and cached (kernel handle: %p)", - launchers[launch_key]->get_kernel()); - } else { - RAFT_LOG_INFO( - "[JIT CACHE] Cache HIT - Reusing cached launcher for key: %s (entrypoint: %s, kernel handle: " - "%p)", - launch_key.c_str(), - this->entrypoint.c_str(), - launchers[launch_key]->get_kernel()); } return launchers[launch_key]; } @@ -102,7 +80,6 @@ std::shared_ptr AlgorithmPlanner::build() std::string archs = "-arch=sm_" + std::to_string((major * 10 + minor)); - // Load the generated LTO IR and link them together nvJitLinkHandle handle; const char* lopts[] = { "-lto", "-split-compile=0", "-split-compile-extended=0", "-maxrregcount=64", archs.c_str()}; @@ -127,7 +104,6 @@ std::shared_ptr AlgorithmPlanner::build() result = nvJitLinkDestroy(&handle); RAFT_EXPECTS(result == NVJITLINK_SUCCESS, "nvJitLinkDestroy failed"); - // cubin is linked, so now load it cudaLibrary_t library; RAFT_CUDA_TRY( cudaLibraryLoadData(&library, cubin.get(), nullptr, nullptr, 0, nullptr, nullptr, 0)); @@ -135,17 +111,14 @@ std::shared_ptr AlgorithmPlanner::build() unsigned int kernel_count = 0; RAFT_CUDA_TRY(cudaLibraryGetKernelCount(&kernel_count, library)); - // NOTE: cudaKernel_t does not need to be freed explicitly std::unique_ptr kernels{new cudaKernel_t[kernel_count]}; RAFT_CUDA_TRY(cudaLibraryEnumerateKernels(kernels.get(), kernel_count, library)); - // Filter out EmptyKernel by checking kernel names using cudaFuncGetName const char* empty_kernel_name = "_ZN3cub6detail11EmptyKernelIvEEvv"; std::vector valid_kernels; valid_kernels.reserve(kernel_count); for (unsigned int i = 0; i < kernel_count; ++i) { - // cudaFuncGetName can be used with cudaKernel_t by casting to void* const void* func_ptr = reinterpret_cast(kernels[i]); const char* func_name = nullptr; RAFT_CUDA_TRY(cudaFuncGetName(&func_name, func_ptr)); @@ -153,14 +126,12 @@ std::shared_ptr AlgorithmPlanner::build() bool is_empty_kernel = false; if (func_name != nullptr) { std::string kernel_name(func_name); - // Check if this is EmptyKernel if (kernel_name.find(empty_kernel_name) != std::string::npos || kernel_name == empty_kernel_name) { is_empty_kernel = true; } } - // Only keep the kernel if it's not EmptyKernel if (!is_empty_kernel) { valid_kernels.push_back(kernels[i]); } } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in index f102f5386b..57f5c3938b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in @@ -20,7 +20,7 @@ template __global__ void apply_filter_kernel_jit<@index_type@, @distance_type@, #else #include -#include +#include #include "apply_filter_kernel_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in index 1f0b47fc7b..60ecb02203 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in @@ -11,9 +11,6 @@ namespace cuvs::neighbors::cagra::detail { -// Instantiate the cosine normalization function -// This fragment provides apply_normalization_standard that normalizes by dataset norm -// QueryT is needed to match the descriptor template signature using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( @distance_type@, const args_t, @index_type@); @@ -23,16 +20,13 @@ template __device__ @distance_type@ apply_normalization_standard<@team_size@, @d #else #include -#include +#include #include "apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { - // This fragment provides apply_normalization_standard (cosine normalization version) - // The planner links the appropriate fragment (noop or cosine) based on metric - // QueryT is always float for normalization (only used for CosineExpanded which uses float queries) using QueryTag = cuvs::neighbors::cagra::detail::tag_f; registerAlgorithm::args_t; template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( @distance_type@, const args_t, @index_type@); @@ -23,16 +20,13 @@ template __device__ @distance_type@ apply_normalization_standard<@team_size@, @d #else #include -#include +#include #include "apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { - // This fragment provides apply_normalization_standard (no-op version) - // The planner links the appropriate fragment (noop or cosine) based on metric - // QueryT is always float for normalization (only used for CosineExpanded which uses float queries) using QueryTag = cuvs::neighbors::cagra::detail::tag_f; registerAlgorithm::args_t; template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( const args_t, @index_type@); @@ -23,18 +20,18 @@ template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block #else #include -#include +#include #include "compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { - // This fragment provides compute_distance (standard version) - // The planner links the appropriate fragment (standard or VPQ) based on descriptor type + using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_type_abbrev@; registerAlgorithm( + tag_dist_@dist_abbrev@, + QueryTag>( "compute_distance_standard_t@team_size@_dim@dataset_block_dim@", embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, sizeof(embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index 43fe29de11..0f36470ad2 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -24,7 +24,7 @@ template __global__ void compute_distance_to_child_nodes_kernel_jit<@team_size@, #else #include -#include +#include #include "compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in index 595127c538..e04e11369b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in @@ -11,9 +11,6 @@ namespace cuvs::neighbors::cagra::detail { -// Instantiate the unified compute_distance function for VPQ descriptor -// PQ_BITS>0, PQ_LEN>0, CodebookT=half for VPQ descriptors -// QueryT is always half for VPQ using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( const args_t, @index_type@); @@ -23,19 +20,19 @@ template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block #else #include -#include +#include #include "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { - // This fragment provides compute_distance (VPQ version) - // The planner links the appropriate fragment (standard or VPQ) based on descriptor type + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; registerAlgorithm( + tag_codebook_half, + QueryTag>( "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, sizeof(embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in index 96d09d9ba1..406a05df2c 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in @@ -11,10 +11,6 @@ namespace cuvs::neighbors::cagra::detail { -// Instantiate the dist_op function for the specific metric -// Each fragment provides dist_op for a specific metric - planner links the appropriate one -// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) -// DistanceT is always float template __device__ @distance_type@ dist_op<@query_type@, @distance_type@>(@query_type@, @query_type@); } // namespace cuvs::neighbors::cagra::detail @@ -22,15 +18,13 @@ template __device__ @distance_type@ dist_op<@query_type@, @distance_type@>(@quer #else #include -#include +#include #include "dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@() { - // QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) - // DistanceT is always float registerAlgorithm( "dist_op_@metric_tag@", diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in index 5cb740ac83..eb1c8e1d29 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -24,7 +24,7 @@ template __global__ void random_pickup_kernel_jit<@team_size@, @dataset_block_di #else #include -#include +#include #include "random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index d1042e0427..648d10e977 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -27,7 +27,7 @@ template __global__ void search_kernel_jit<@team_size@, @dataset_block_dim@, @pq #else #include -#include +#include #include "search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index 79b17b41f6..8966360c14 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -6,7 +6,7 @@ #pragma once // Include tags header before namespace (it defines a namespace) -#include +#include #include #include @@ -54,19 +54,18 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { std::string key = "setup_workspace_"; if (is_vpq) { key += "vpq_"; - // Note: Metric is no longer in the key - VPQ only supports L2Expanded using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - auto params = make_fragment_key(); + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; + auto params = make_fragment_key(); key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; } else { - // Standard dataset - Metric is no longer in the key, linked via dist_op and normalization - // fragments - auto params = make_fragment_key(); + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; key += "standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); + auto params = make_fragment_key(); key += "_" + params; } this->device_functions.push_back(key); @@ -80,28 +79,39 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { uint32_t pq_len = 0) { if (is_vpq) { - // VPQ: Metric is no longer in the key - VPQ only supports L2Expanded std::string key = "compute_distance_vpq_"; using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - auto params = make_fragment_key(); + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; + auto params = make_fragment_key(); key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; this->device_functions.push_back(key); } else { - // Standard: compute_distance_standard no longer has metric in the name - // Metric is handled via dist_op fragments std::string key = "compute_distance_standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); - auto params = make_fragment_key(); - key += "_" + params; + if (metric == cuvs::distance::DistanceType::BitwiseHamming) { + using tag_uc = cuvs::neighbors::cagra::detail::tag_uc; + if constexpr (std::is_same_v) { + auto params = make_fragment_key(); + key += "_" + params; + } else { + auto params = make_fragment_key(); + key += "_" + params; + } + } else { + auto params = make_fragment_key(); + key += "_" + params; + } this->device_functions.push_back(key); - - // Add dist_op fragment for the metric add_dist_op_device_function(metric); - - // Add normalization fragment (cosine or noop) add_normalization_device_function(metric, team_size, dataset_block_dim); } } @@ -119,8 +129,6 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; default: metric_tag = "unknown"; break; } - // QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) - // DistanceT is always float std::string params; if (metric == cuvs::distance::DistanceType::BitwiseHamming) { params = make_fragment_key(); @@ -135,18 +143,13 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { uint32_t team_size, uint32_t dataset_block_dim) { - // Both cosine and noop fragments provide the same function name "apply_normalization_standard" - // but register with different fragment names. The planner links the appropriate one based on - // metric. std::string normalization_type; if (metric == cuvs::distance::DistanceType::CosineExpanded) { normalization_type = "cosine"; } else { normalization_type = "noop"; } - // QueryT is always float for normalization (only used for CosineExpanded which uses float - // queries) - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; // Always float for normalization + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; auto params = make_fragment_key(); std::string key = "apply_normalization_standard_" + normalization_type; key += "_t" + std::to_string(team_size); @@ -171,12 +174,7 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { uint32_t pq_len) { std::string name = "search_multi_cta_kernel"; - if (is_vpq) { - name += "_vpq"; - // Note: Metric is no longer in VPQ kernel names - VPQ only supports L2Expanded - } - // Note: Metric is no longer in kernel names - it's linked via dist_op and normalization - // fragments + if (is_vpq) { name += "_vpq"; } name += "_t" + std::to_string(team_size); name += "_dim" + std::to_string(dataset_block_dim); if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index d54cbca3b9..550101dbab 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -6,7 +6,7 @@ #pragma once // Include tags header before namespace (it defines a namespace) -#include +#include #include #include @@ -57,19 +57,18 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { std::string key = "setup_workspace_"; if (is_vpq) { key += "vpq_"; - // Note: Metric is no longer in the key - VPQ only supports L2Expanded using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - auto params = make_fragment_key(); + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; + auto params = make_fragment_key(); key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; } else { - // Standard dataset - Metric is no longer in the key, linked via dist_op and normalization - // fragments - auto params = make_fragment_key(); + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; key += "standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); + auto params = make_fragment_key(); key += "_" + params; } this->device_functions.push_back(key); @@ -83,28 +82,39 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { uint32_t pq_len = 0) { if (is_vpq) { - // VPQ: Metric is no longer in the key - VPQ only supports L2Expanded std::string key = "compute_distance_vpq_"; using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - auto params = make_fragment_key(); + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; + auto params = make_fragment_key(); key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; this->device_functions.push_back(key); } else { - // Standard: compute_distance_standard no longer has metric in the name - // Metric is handled via dist_op fragments std::string key = "compute_distance_standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); - auto params = make_fragment_key(); - key += "_" + params; + if (metric == cuvs::distance::DistanceType::BitwiseHamming) { + using tag_uc = cuvs::neighbors::cagra::detail::tag_uc; + if constexpr (std::is_same_v) { + auto params = make_fragment_key(); + key += "_" + params; + } else { + auto params = make_fragment_key(); + key += "_" + params; + } + } else { + auto params = make_fragment_key(); + key += "_" + params; + } this->device_functions.push_back(key); - - // Add dist_op fragment for the metric add_dist_op_device_function(metric); - - // Add normalization fragment (cosine or noop) add_normalization_device_function(metric, team_size, dataset_block_dim); } } @@ -122,8 +132,6 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; default: metric_tag = "unknown"; break; } - // QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) - // DistanceT is always float std::string params; if (metric == cuvs::distance::DistanceType::BitwiseHamming) { params = make_fragment_key(); @@ -138,18 +146,13 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { uint32_t team_size, uint32_t dataset_block_dim) { - // Both cosine and noop fragments provide the same function name "apply_normalization_standard" - // but register with different fragment names. The planner links the appropriate one based on - // metric. std::string normalization_type; if (metric == cuvs::distance::DistanceType::CosineExpanded) { normalization_type = "cosine"; } else { normalization_type = "noop"; } - // QueryT is always float for normalization (only used for CosineExpanded which uses float - // queries) - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; // Always float for normalization + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; auto params = make_fragment_key(); std::string key = "apply_normalization_standard_" + normalization_type; key += "_t" + std::to_string(team_size); @@ -176,16 +179,10 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { uint32_t pq_bits, uint32_t pq_len) { - // Special case: apply_filter_kernel doesn't use dataset_descriptor, so no suffixes needed if (kernel_name == "apply_filter_kernel") { return kernel_name; } std::string name = kernel_name; - if (is_vpq) { - name += "_vpq"; - // Note: Metric is no longer in VPQ kernel names - VPQ only supports L2Expanded - } - // Note: Metric is no longer in kernel names - it's linked via dist_op and normalization - // fragments + if (is_vpq) { name += "_vpq"; } name += "_t" + std::to_string(team_size); name += "_dim" + std::to_string(dataset_block_dim); if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index 7bf948edf7..e7d314e505 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -24,7 +24,7 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_b #else #include -#include +#include #include "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in index 5bec86da8d..1cbaddd41d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in @@ -24,7 +24,7 @@ template __global__ void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_so #else #include -#include +#include #include "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index a6e75eb237..8a75b4ed48 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -5,18 +5,14 @@ #pragma once -// Include tags header before namespace (it defines a namespace) -#include +#include #include #include #include #include -#include -#include #include -// Use nested namespace syntax to allow inclusion from within parent namespace namespace cuvs { namespace neighbors { namespace cagra { @@ -63,21 +59,18 @@ struct CagraSearchPlanner : AlgorithmPlanner { std::string key = "setup_workspace_"; if (is_vpq) { key += "vpq_"; - // For VPQ, include codebook type tag in template parameters - // Note: Metric is no longer in the key - VPQ only supports L2Expanded using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - // Use template tags only for types, strings for integers/enums - auto params = make_fragment_key(); + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; + auto params = make_fragment_key(); key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; } else { - // Standard dataset - Metric is no longer in the key, linked via dist_op and normalization - // fragments Use template tags only for types, strings for integers/enums - auto params = make_fragment_key(); + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; key += "standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); + auto params = make_fragment_key(); key += "_" + params; } this->device_functions.push_back(key); @@ -91,28 +84,39 @@ struct CagraSearchPlanner : AlgorithmPlanner { uint32_t pq_len = 0) { if (is_vpq) { - // VPQ: Metric is no longer in the key - VPQ only supports L2Expanded std::string key = "compute_distance_vpq_"; using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - auto params = make_fragment_key(); + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; + auto params = make_fragment_key(); key += "t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; key += "_" + params; this->device_functions.push_back(key); } else { - // Standard: compute_distance_standard no longer has metric in the name - // Metric is handled via dist_op fragments std::string key = "compute_distance_standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); - auto params = make_fragment_key(); - key += "_" + params; + if (metric == cuvs::distance::DistanceType::BitwiseHamming) { + using tag_uc = cuvs::neighbors::cagra::detail::tag_uc; + if constexpr (std::is_same_v) { + auto params = make_fragment_key(); + key += "_" + params; + } else { + auto params = make_fragment_key(); + key += "_" + params; + } + } else { + auto params = make_fragment_key(); + key += "_" + params; + } this->device_functions.push_back(key); - - // Add dist_op fragment for the metric add_dist_op_device_function(metric); - - // Add normalization fragment (cosine or noop) add_normalization_device_function(metric, team_size, dataset_block_dim); } } @@ -130,8 +134,6 @@ struct CagraSearchPlanner : AlgorithmPlanner { case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; default: metric_tag = "unknown"; break; } - // QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) - // DistanceT is always float std::string params; if (metric == cuvs::distance::DistanceType::BitwiseHamming) { params = make_fragment_key(); @@ -146,19 +148,13 @@ struct CagraSearchPlanner : AlgorithmPlanner { uint32_t team_size, uint32_t dataset_block_dim) { - // Both cosine and noop fragments provide the same function name "apply_normalization_standard" - // but register with different fragment names. The planner links the appropriate one based on - // metric. - // QueryT is always float for normalization (only used for CosineExpanded which uses float - // queries) std::string normalization_type; if (metric == cuvs::distance::DistanceType::CosineExpanded) { normalization_type = "cosine"; } else { normalization_type = "noop"; } - // For lookup, we need to manually append fragment key parameters - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; // Always float for normalization + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; auto params = make_fragment_key(); std::string key = "apply_normalization_standard_" + normalization_type; key += "_t" + std::to_string(team_size); @@ -184,11 +180,8 @@ struct CagraSearchPlanner : AlgorithmPlanner { bool persistent) { std::string name = (persistent ? "search_single_cta_kernel_p_" : "search_single_cta_kernel_"); - // Note: "vpq" is no longer in the name - PQ parameters distinguish VPQ from standard name += bool_to_string(topk_by_bitonic_sort) + "_"; name += bool_to_string(bitonic_sort_and_merge_multi_warps) + "_"; - // Note: Metric is no longer in kernel names - it's linked via dist_op and normalization - // fragments name += "t" + std::to_string(team_size); name += "_dim" + std::to_string(dataset_block_dim); if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in index 1d23ccc218..a7b75ec067 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in @@ -11,9 +11,6 @@ namespace cuvs::neighbors::cagra::detail { -// Instantiate the unified setup_workspace function for standard descriptor -// PQ_BITS=0, PQ_LEN=0, CodebookT=void for standard descriptors -// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); @@ -22,18 +19,18 @@ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@d #else #include -#include +#include #include "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { - // This fragment provides setup_workspace (standard version) - // The planner links the appropriate fragment (standard or VPQ) based on descriptor type + using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_type_abbrev@; registerAlgorithm( + tag_dist_@dist_abbrev@, + QueryTag>( "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@", embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, sizeof(embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in index d608c10506..ac2b791f4a 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in @@ -11,9 +11,6 @@ namespace cuvs::neighbors::cagra::detail { -// Instantiate the unified setup_workspace function for VPQ descriptor -// PQ_BITS>0, PQ_LEN>0, CodebookT=half for VPQ descriptors -// QueryT is always half for VPQ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); @@ -22,19 +19,19 @@ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@d #else #include -#include +#include #include "setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; __attribute__((__constructor__)) static void register_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { - // This fragment provides setup_workspace (VPQ version) - // The planner links the appropriate fragment (standard or VPQ) based on descriptor type + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; registerAlgorithm( + tag_codebook_half, + QueryTag>( "setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", embedded_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, sizeof(embedded_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 123308c581..0329e68d56 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -10,7 +10,7 @@ #endif // Include tags header before any other includes that might open namespaces -#include +#include #include "compute_distance.hpp" // For dataset_descriptor_host #include "jit_lto_kernels/search_multi_cta_planner.hpp" diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index 0fa02b47fa..12f711b31d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -8,7 +8,7 @@ // functions #ifdef CUVS_ENABLE_JIT_LTO #include "search_multi_kernel_launcher_jit.cuh" -#include +#include #endif #include "set_value_batch.cuh" diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index ebc7d5dc1b..3b3bacb0ae 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -13,7 +13,7 @@ #include // Include tags header before any other includes that might open namespaces -#include +#include #include "compute_distance.hpp" // For dataset_descriptor_host #include "jit_lto_kernels/search_single_cta_planner.hpp" diff --git a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp index 3f7659da19..be2b6481a7 100644 --- a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp +++ b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp @@ -10,7 +10,7 @@ #endif // Include tags header before any other includes that might open namespaces -#include +#include #include "../../sample_filter.cuh" // For none_sample_filter, bitset_filter diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index 3d68515628..240bd2c98a 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -10,7 +10,7 @@ #include "jit_lto_kernels/interleaved_scan_planner.hpp" #include #include -#include +#include #include #include @@ -72,12 +72,9 @@ constexpr auto get_filter_type_tag() { using namespace cuvs::neighbors::filtering; - // Determine the filter implementation tag - if constexpr (std::is_same_v) { - return tag_filter{}; - } + if constexpr (std::is_same_v) { return tag_filter_none{}; } if constexpr (std::is_same_v>) { - return tag_filter{}; + return tag_filter_bitset{}; } } @@ -96,12 +93,8 @@ constexpr auto get_metric_name() template constexpr auto get_filter_name() { - if constexpr (std::is_same_v>) { - return "filter_none_l"; - } - if constexpr (std::is_same_v>) { - return "filter_bitset_l"; - } + if constexpr (std::is_same_v) { return "filter_none_l"; } + if constexpr (std::is_same_v) { return "filter_bitset_l"; } } template diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in index 4ca8e88fd6..9c3674f0e4 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in @@ -23,7 +23,7 @@ template __global__ void interleaved_scan_kernel<@capacity@, @veclen@, @ascendin #else #include -#include +#include #include "interleaved_scan_kernel_@capacity@_@veclen@_@ascending@_@compute_norm@_@type_abbrev@_@acc_abbrev@_@idx_abbrev@.h" using namespace cuvs::neighbors::ivf_flat::detail; diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in index 0f6bb904d1..0a78a405a2 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric.cu.in @@ -19,7 +19,7 @@ template __device__ void compute_dist<@veclen@, @data_type@, @acc_type@>(@acc_ty #else #include -#include +#include #include "metric_@metric_name@_@veclen@_@type_abbrev@_@acc_abbrev@.h" using namespace cuvs::neighbors::ivf_flat::detail; From 3256a8efbcaeed9003c4b548ed5df4d7a6a217ee Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 20 Feb 2026 20:19:10 +0000 Subject: [PATCH 121/158] attempt to fix devcontainer error --- cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index aafd1c868e..a17226cd01 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -16,7 +16,7 @@ nvrtcResult result = _call; \ std::string error_string = \ std::string("nvrtc error: ") + std::string(nvrtcGetErrorString(result)); \ - RAFT_EXPECTS(result == NVRTC_SUCCESS, error_string.c_str()); \ + RAFT_EXPECTS(result == NVRTC_SUCCESS, "%s", error_string.c_str()); \ } NVRTCLTOFragmentCompiler::NVRTCLTOFragmentCompiler() From 43501b7fa0cdfe8f70d5582f3654875021847b2d Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 20 Feb 2026 20:20:51 +0000 Subject: [PATCH 122/158] address review comments --- cpp/include/cuvs/neighbors/ivf_flat.hpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index ccb92eb594..2b11cd390d 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -3147,12 +3147,8 @@ __device__ __forceinline__ AccT abs_diff(point x, point { if constexpr (std::is_same_v && V > 1) { auto diff = __vabsdiffu4(x.raw(), y.raw()); - return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + - ((diff >> 24) & 0xFF); } else if constexpr (std::is_same_v && V > 1) { auto diff = __vabsdiffs4(x.raw(), y.raw()); - return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + - ((diff >> 24) & 0xFF); } else { auto a = x.raw(); auto b = y.raw(); @@ -3293,12 +3289,8 @@ __device__ __forceinline__ AccT abs_diff(point x, point { if constexpr (std::is_same_v && V > 1) { auto diff = __vabsdiffu4(x.raw(), y.raw()); - return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + - ((diff >> 24) & 0xFF); } else if constexpr (std::is_same_v && V > 1) { auto diff = __vabsdiffs4(x.raw(), y.raw()); - return ((diff >> 0) & 0xFF) + ((diff >> 8) & 0xFF) + ((diff >> 16) & 0xFF) + - ((diff >> 24) & 0xFF); } else { auto a = x.raw(); auto b = y.raw(); From b85f16bc0fef24a69c4593292e880e676a1cfb4c Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 24 Feb 2026 14:22:15 -0500 Subject: [PATCH 123/158] Add matrix JSON files --- .../apply_filter_kernel_matrix.json | 20 ++ ..._normalization_standard_cosine_matrix.json | 42 ++++ ...ly_normalization_standard_noop_matrix.json | 42 ++++ .../compute_distance_standard_matrix.json | 70 ++++++ ...distance_to_child_nodes_kernel_matrix.json | 166 ++++++++++++++ .../compute_distance_vpq_matrix.json | 58 +++++ .../cagra/jit_lto_kernels/dist_op_matrix.json | 25 +++ .../cagra/jit_lto_kernels/filter_matrix.json | 21 ++ .../random_pickup_kernel_matrix.json | 154 +++++++++++++ .../search_multi_cta_kernel_matrix.json | 166 ++++++++++++++ .../search_single_cta_kernel_matrix.json | 206 ++++++++++++++++++ .../search_single_cta_kernel_p_matrix.json | 206 ++++++++++++++++++ .../setup_workspace_standard_matrix.json | 70 ++++++ .../setup_workspace_vpq_matrix.json | 58 +++++ 14 files changed, 1304 insertions(+) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_matrix.json create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_matrix.json diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel_matrix.json new file mode 100644 index 0000000000..4f14f7d8c0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel_matrix.json @@ -0,0 +1,20 @@ +{ + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ] +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_matrix.json new file mode 100644 index 0000000000..f6703da252 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_matrix.json @@ -0,0 +1,42 @@ +{ + "_data": [ + { + "data_type": "float", + "data_abbrev": "f" + }, + { + "data_type": "__half", + "data_abbrev": "h" + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc" + }, + { + "data_type": "int8_t", + "data_abbrev": "sc" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ] +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_matrix.json new file mode 100644 index 0000000000..f6703da252 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_matrix.json @@ -0,0 +1,42 @@ +{ + "_data": [ + { + "data_type": "float", + "data_abbrev": "f" + }, + { + "data_type": "__half", + "data_abbrev": "h" + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc" + }, + { + "data_type": "int8_t", + "data_abbrev": "sc" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ] +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_matrix.json new file mode 100644 index 0000000000..3c80cb18f3 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_matrix.json @@ -0,0 +1,70 @@ +{ + "_data": [ + { + "data_type": "float", + "data_abbrev": "f", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "__half", + "data_abbrev": "h", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + }, + { + "query_type": "uint8_t", + "query_abbrev": "uc" + } + ] + }, + { + "data_type": "int8_t", + "data_abbrev": "sc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ] +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json new file mode 100644 index 0000000000..81c8c1c48f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json @@ -0,0 +1,166 @@ +[ + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "__half", + "data_abbrev": "h", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + }, + { + "query_type": "uint8_t", + "query_abbrev": "uc" + } + ] + }, + { + "data_type": "int8_t", + "data_abbrev": "sc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + } + ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "0", + "pq_len": "0", + "pq_prefix": "", + "pq_suffix": "" + } + ], + "_codebook": [ + { + "codebook_type": "void", + "codebook_tag": "" + } + ] + }, + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f" + }, + { + "data_type": "__half", + "data_abbrev": "h" + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc" + }, + { + "data_type": "int8_t", + "data_abbrev": "sc" + } + ], + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "8", + "pq_len": "2", + "pq_prefix": "_vpq", + "pq_suffix": "_8pq_2subd" + }, + { + "pq_bits": "8", + "pq_len": "4", + "pq_prefix": "_vpq", + "pq_suffix": "_8pq_4subd" + } + ], + "_codebook": [ + { + "codebook_type": "half", + "codebook_tag": ", tag_codebook_half" + } + ] + } +] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_matrix.json new file mode 100644 index 0000000000..bb4d6b8471 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_matrix.json @@ -0,0 +1,58 @@ +{ + "_data": [ + { + "data_type": "float", + "data_abbrev": "f" + }, + { + "data_type": "__half", + "data_abbrev": "h" + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc" + }, + { + "data_type": "int8_t", + "data_abbrev": "sc" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "pq_len": [ + "2", + "4" + ], + "pq_bits": [ + "8" + ], + "codebook_type": [ + "half" + ], + "_query": [ + { + "query_type": "half", + "query_abbrev": "h" + } + ] +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json new file mode 100644 index 0000000000..174a917811 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json @@ -0,0 +1,25 @@ +{ + "_metric": [ + { + "metric_tag": "l2", + "query_type": "float", + "query_type_abbrev": "f" + }, + { + "metric_tag": "inner_product", + "query_type": "float", + "query_type_abbrev": "f" + }, + { + "metric_tag": "hamming", + "query_type": "uint8_t", + "query_type_abbrev": "uc" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ] +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_matrix.json new file mode 100644 index 0000000000..c253774432 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_matrix.json @@ -0,0 +1,21 @@ +{ + "_filter": [ + { + "filter_name": "filter_none", + "header_file": "neighbors/detail/jit_lto_kernels/filter_none.cuh" + }, + { + "filter_name": "filter_bitset", + "header_file": "neighbors/detail/jit_lto_kernels/filter_bitset.cuh" + } + ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "namespace": [ + "cuvs::neighbors::detail" + ] +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json new file mode 100644 index 0000000000..757f519685 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json @@ -0,0 +1,154 @@ +[ + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "__half", + "data_abbrev": "h", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + }, + { + "query_type": "uint8_t", + "query_abbrev": "uc" + } + ] + }, + { + "data_type": "int8_t", + "data_abbrev": "sc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "0", + "pq_len": "0", + "pq_prefix": "", + "pq_suffix": "" + } + ], + "_codebook": [ + { + "codebook_type": "void", + "codebook_tag": "" + } + ] + }, + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f" + }, + { + "data_type": "__half", + "data_abbrev": "h" + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc" + }, + { + "data_type": "int8_t", + "data_abbrev": "sc" + } + ], + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "8", + "pq_len": "2", + "pq_prefix": "_vpq", + "pq_suffix": "_8pq_2subd" + }, + { + "pq_bits": "8", + "pq_len": "4", + "pq_prefix": "_vpq", + "pq_suffix": "_8pq_4subd" + } + ], + "_codebook": [ + { + "codebook_type": "half", + "codebook_tag": ", tag_codebook_half" + } + ] + } +] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json new file mode 100644 index 0000000000..81c8c1c48f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json @@ -0,0 +1,166 @@ +[ + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "__half", + "data_abbrev": "h", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + }, + { + "query_type": "uint8_t", + "query_abbrev": "uc" + } + ] + }, + { + "data_type": "int8_t", + "data_abbrev": "sc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + } + ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "0", + "pq_len": "0", + "pq_prefix": "", + "pq_suffix": "" + } + ], + "_codebook": [ + { + "codebook_type": "void", + "codebook_tag": "" + } + ] + }, + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f" + }, + { + "data_type": "__half", + "data_abbrev": "h" + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc" + }, + { + "data_type": "int8_t", + "data_abbrev": "sc" + } + ], + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "8", + "pq_len": "2", + "pq_prefix": "_vpq", + "pq_suffix": "_8pq_2subd" + }, + { + "pq_bits": "8", + "pq_len": "4", + "pq_prefix": "_vpq", + "pq_suffix": "_8pq_4subd" + } + ], + "_codebook": [ + { + "codebook_type": "half", + "codebook_tag": ", tag_codebook_half" + } + ] + } +] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json new file mode 100644 index 0000000000..889fae2242 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json @@ -0,0 +1,206 @@ +[ + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "__half", + "data_abbrev": "h", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + }, + { + "query_type": "uint8_t", + "query_abbrev": "uc" + } + ] + }, + { + "data_type": "int8_t", + "data_abbrev": "sc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + } + ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "_topk_by_bitonic": [ + { + "topk_by_bitonic_sort": "true", + "topk_by_bitonic_sort_str": "true" + }, + { + "topk_by_bitonic_sort": "false", + "topk_by_bitonic_sort_str": "false" + } + ], + "_bitonic_sort_and_merge_multi_warps": [ + { + "bitonic_sort_and_merge_multi_warps": "true", + "bitonic_sort_and_merge_multi_warps_str": "true" + }, + { + "bitonic_sort_and_merge_multi_warps": "false", + "bitonic_sort_and_merge_multi_warps_str": "false" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "0", + "pq_len": "0", + "pq_prefix": "", + "pq_suffix": "" + } + ], + "_codebook": [ + { + "codebook_type": "void", + "codebook_tag": "" + } + ] + }, + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f" + }, + { + "data_type": "__half", + "data_abbrev": "h" + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc" + }, + { + "data_type": "int8_t", + "data_abbrev": "sc" + } + ], + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "_topk_by_bitonic": [ + { + "topk_by_bitonic_sort": "true", + "topk_by_bitonic_sort_str": "true" + }, + { + "topk_by_bitonic_sort": "false", + "topk_by_bitonic_sort_str": "false" + } + ], + "_bitonic_sort_and_merge_multi_warps": [ + { + "bitonic_sort_and_merge_multi_warps": "true", + "bitonic_sort_and_merge_multi_warps_str": "true" + }, + { + "bitonic_sort_and_merge_multi_warps": "false", + "bitonic_sort_and_merge_multi_warps_str": "false" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "8", + "pq_len": "2", + "pq_prefix": "", + "pq_suffix": "_8pq_2subd" + }, + { + "pq_bits": "8", + "pq_len": "4", + "pq_prefix": "", + "pq_suffix": "_8pq_4subd" + } + ], + "_codebook": [ + { + "codebook_type": "half", + "codebook_tag": ", tag_codebook_half" + } + ] + } +] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json new file mode 100644 index 0000000000..889fae2242 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json @@ -0,0 +1,206 @@ +[ + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "__half", + "data_abbrev": "h", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + }, + { + "query_type": "uint8_t", + "query_abbrev": "uc" + } + ] + }, + { + "data_type": "int8_t", + "data_abbrev": "sc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + } + ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "_topk_by_bitonic": [ + { + "topk_by_bitonic_sort": "true", + "topk_by_bitonic_sort_str": "true" + }, + { + "topk_by_bitonic_sort": "false", + "topk_by_bitonic_sort_str": "false" + } + ], + "_bitonic_sort_and_merge_multi_warps": [ + { + "bitonic_sort_and_merge_multi_warps": "true", + "bitonic_sort_and_merge_multi_warps_str": "true" + }, + { + "bitonic_sort_and_merge_multi_warps": "false", + "bitonic_sort_and_merge_multi_warps_str": "false" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "0", + "pq_len": "0", + "pq_prefix": "", + "pq_suffix": "" + } + ], + "_codebook": [ + { + "codebook_type": "void", + "codebook_tag": "" + } + ] + }, + { + "_data": [ + { + "data_type": "float", + "data_abbrev": "f" + }, + { + "data_type": "__half", + "data_abbrev": "h" + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc" + }, + { + "data_type": "int8_t", + "data_abbrev": "sc" + } + ], + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "_topk_by_bitonic": [ + { + "topk_by_bitonic_sort": "true", + "topk_by_bitonic_sort_str": "true" + }, + { + "topk_by_bitonic_sort": "false", + "topk_by_bitonic_sort_str": "false" + } + ], + "_bitonic_sort_and_merge_multi_warps": [ + { + "bitonic_sort_and_merge_multi_warps": "true", + "bitonic_sort_and_merge_multi_warps_str": "true" + }, + { + "bitonic_sort_and_merge_multi_warps": "false", + "bitonic_sort_and_merge_multi_warps_str": "false" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "_pq": [ + { + "pq_bits": "8", + "pq_len": "2", + "pq_prefix": "", + "pq_suffix": "_8pq_2subd" + }, + { + "pq_bits": "8", + "pq_len": "4", + "pq_prefix": "", + "pq_suffix": "_8pq_4subd" + } + ], + "_codebook": [ + { + "codebook_type": "half", + "codebook_tag": ", tag_codebook_half" + } + ] + } +] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_matrix.json new file mode 100644 index 0000000000..3c80cb18f3 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_matrix.json @@ -0,0 +1,70 @@ +{ + "_data": [ + { + "data_type": "float", + "data_abbrev": "f", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "__half", + "data_abbrev": "h", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + }, + { + "query_type": "uint8_t", + "query_abbrev": "uc" + } + ] + }, + { + "data_type": "int8_t", + "data_abbrev": "sc", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ] +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_matrix.json new file mode 100644 index 0000000000..bb4d6b8471 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_matrix.json @@ -0,0 +1,58 @@ +{ + "_data": [ + { + "data_type": "float", + "data_abbrev": "f" + }, + { + "data_type": "__half", + "data_abbrev": "h" + }, + { + "data_type": "uint8_t", + "data_abbrev": "uc" + }, + { + "data_type": "int8_t", + "data_abbrev": "sc" + } + ], + "_index": [ + { + "index_type": "uint32_t", + "index_abbrev": "ui" + } + ], + "_distance": [ + { + "distance_type": "float", + "distance_abbrev": "f" + } + ], + "team_size": [ + "8", + "16", + "32" + ], + "dataset_block_dim": [ + "128", + "256", + "512" + ], + "pq_len": [ + "2", + "4" + ], + "pq_bits": [ + "8" + ], + "codebook_type": [ + "half" + ], + "_query": [ + { + "query_type": "half", + "query_abbrev": "h" + } + ] +} From de0a2b55be4bc20fc7c0826abddbad815fdb93fd Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 24 Feb 2026 14:32:53 -0500 Subject: [PATCH 124/158] Fix --- cpp/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4b6c9dc5ab..cd56e9c4d2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -213,6 +213,13 @@ endif() # ################################################################################################## # * cuvs --------------------------------------------------------------------- if(NOT BUILD_CPU_ONLY) + set(JIT_LTO_TARGET_ARCHITECTURE "") + set(JIT_LTO_COMPILATION OFF) + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + set(JIT_LTO_TARGET_ARCHITECTURE "75-real") + set(JIT_LTO_COMPILATION ON) + endif() + add_library(cuvs_cpp_headers INTERFACE) add_library(cuvs::cuvs_cpp_headers ALIAS cuvs_cpp_headers) target_include_directories( @@ -349,13 +356,6 @@ if(NOT BUILD_CPU_ONLY) ) endif() - set(JIT_LTO_TARGET_ARCHITECTURE "") - set(JIT_LTO_COMPILATION OFF) - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) - set(JIT_LTO_TARGET_ARCHITECTURE "75-real") - set(JIT_LTO_COMPILATION ON) - endif() - if(JIT_LTO_COMPILATION) # Generate interleaved scan kernel files at build time include(cmake/modules/generate_jit_lto_kernels.cmake) From c7909c34769f5577b2c0628c6a661ea73e9371dd Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 24 Feb 2026 21:59:20 +0000 Subject: [PATCH 125/158] more refactors and fix stream serialization bug --- .../modules/generate_jit_lto_kernels.cmake | 113 +++++-- .../cuvs/detail/jit_lto/FragmentDatabase.hpp | 2 + cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 18 -- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 2 +- cpp/src/detail/jit_lto/FragmentDatabase.cu | 22 +- .../detail/cagra/compute_distance.hpp | 55 +++- .../jit_lto_kernels/apply_filter_kernel.cu.in | 1 - ...cosine.cu.in => apply_normalization.cu.in} | 12 +- .../apply_normalization_standard_noop.cu.in | 40 --- .../jit_lto_kernels/cagra_planner_base.hpp | 135 ++++++++ ..._standard.cu.in => compute_distance.cu.in} | 16 +- ...mpute_distance_to_child_nodes_kernel.cu.in | 7 +- .../compute_distance_vpq.cu.in | 41 --- .../extern_device_functions.cuh | 23 +- .../cagra/jit_lto_kernels/filter_data.h | 9 - .../random_pickup_kernel.cu.in | 7 +- .../search_multi_cta_kernel.cu.in | 11 +- .../search_multi_cta_kernel_jit.cuh | 2 +- .../search_multi_cta_planner.hpp | 156 +--------- .../search_multi_kernel_jit.cuh | 2 +- .../search_multi_kernel_planner.hpp | 159 +--------- .../search_single_cta_kernel.cu.in | 7 +- .../search_single_cta_kernel_jit.cuh | 2 +- .../search_single_cta_kernel_p.cu.in | 7 +- .../search_single_cta_planner.hpp | 193 ++---------- ...e_standard.cu.in => setup_workspace.cu.in} | 16 +- .../jit_lto_kernels/setup_workspace_vpq.cu.in | 40 --- .../search_multi_cta_kernel_launcher_jit.cuh | 157 +++++++--- .../search_multi_kernel_launcher_jit.cuh | 274 +++++++++++++---- .../search_single_cta_kernel_launcher_jit.cuh | 287 ++++++++++++++---- .../detail/cagra/shared_launcher_jit.hpp | 30 ++ cpp/src/neighbors/detail/cagra/utils.hpp | 8 +- .../ivf_flat/jit_lto_kernels/filter.cu.in | 32 -- 33 files changed, 986 insertions(+), 900 deletions(-) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{apply_normalization_standard_cosine.cu.in => apply_normalization.cu.in} (61%) delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{compute_distance_standard.cu.in => compute_distance.cu.in} (50%) delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{setup_workspace_standard.cu.in => setup_workspace.cu.in} (51%) delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index c27b78dc04..e3e488db4d 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -240,9 +240,9 @@ function(generate_jit_lto_kernels target) set(cagra_source_index_types "uint32_t") set(cagra_source_index_abbrevs "ui") - # Generate setup_workspace_standard fragments (one per team_size, dataset_block_dim, data_type, - # index_type, distance_type, query_type) QueryT can be float (for most metrics) or uint8_t (for - # BitwiseHamming when DataT=uint8_t) + # Generate setup_workspace fragments (one per team_size, dataset_block_dim, data_type, index_type, + # distance_type, query_type) QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming + # when DataT=uint8_t) foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) @@ -258,10 +258,20 @@ function(generate_jit_lto_kernels target) set(distance_type "${cagra_distance_type}") set(query_type "float") set(query_type_abbrev "f") + set(query_type_suffix "_f") + set(query_type_suffix_reg "_f") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_prefix "_standard") + set(pq_suffix "") + set(codebook_tag "") + set(codebook_tag_comma "") + set(impl_file "setup_workspace_standard_impl.cuh") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace.cu.in" "${filename}" @ONLY ) @@ -283,10 +293,20 @@ function(generate_jit_lto_kernels target) set(distance_type "${cagra_distance_type}") set(query_type "uint8_t") set(query_type_abbrev "uc") + set(query_type_suffix "_uc") + set(query_type_suffix_reg "_uc") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_prefix "_standard") + set(pq_suffix "") + set(codebook_tag "") + set(codebook_tag_comma "") + set(impl_file "setup_workspace_standard_impl.cuh") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace.cu.in" "${filename}" @ONLY ) @@ -302,9 +322,8 @@ function(generate_jit_lto_kernels target) endforeach() endforeach() - # Generate compute_distance_standard fragments (without metric - metric is handled via dist_op - # fragments) QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming when - # DataT=uint8_t) + # Generate compute_distance fragments (without metric - metric is handled via dist_op fragments) + # QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming when DataT=uint8_t) foreach(data_idx IN ITEMS 0 1 2 3) list(GET cagra_data_types ${data_idx} data_type) list(GET cagra_data_type_abbrevs ${data_idx} type_abbrev) @@ -320,10 +339,20 @@ function(generate_jit_lto_kernels target) set(distance_type "${cagra_distance_type}") set(query_type "float") set(query_type_abbrev "f") + set(query_type_suffix "_f") + set(query_type_suffix_reg "_f") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_prefix "_standard") + set(pq_suffix "") + set(codebook_tag "") + set(codebook_tag_comma "") + set(impl_file "compute_distance_standard_impl.cuh") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance.cu.in" "${filename}" @ONLY ) @@ -345,10 +374,20 @@ function(generate_jit_lto_kernels target) set(distance_type "${cagra_distance_type}") set(query_type "uint8_t") set(query_type_abbrev "uc") + set(query_type_suffix "_uc") + set(query_type_suffix_reg "_uc") + set(pq_bits "0") + set(pq_len "0") + set(codebook_type "void") + set(pq_prefix "_standard") + set(pq_suffix "") + set(codebook_tag "") + set(codebook_tag_comma "") + set(impl_file "compute_distance_standard_impl.cuh") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance.cu.in" "${filename}" @ONLY ) @@ -370,16 +409,12 @@ function(generate_jit_lto_kernels target) set(dist_op_tags "l2" "inner_product" "hamming") foreach(metric_tag IN LISTS dist_op_tags) if(metric_tag STREQUAL "hamming") - # BitwiseHamming uses QueryT=uint8_t set(query_type "uint8_t") set(query_type_abbrev "uc") else() - # L2 and InnerProduct use QueryT=float set(query_type "float") set(query_type_abbrev "f") endif() - # Generate dist_op fragment for this metric tag Note: dist_op uses QueryT and DistanceT, not - # DataT set(kernel_name "dist_op_${metric_tag}_${query_type_abbrev}_${cagra_distance_abbrev}") set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") set(metric_tag "${metric_tag}") @@ -417,8 +452,9 @@ function(generate_jit_lto_kernels target) set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") + set(normalization_suffix "_noop") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization.cu.in" "${filename}" @ONLY ) @@ -435,8 +471,9 @@ function(generate_jit_lto_kernels target) "apply_normalization_standard_cosine_t${team_size}_dim${dataset_block_dim}_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") + set(normalization_suffix "_cosine") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization.cu.in" "${filename}" @ONLY ) @@ -459,24 +496,30 @@ function(generate_jit_lto_kernels target) foreach(team_size IN LISTS cagra_team_sizes) foreach(dataset_block_dim IN LISTS cagra_dataset_block_dims) foreach(pq_len IN LISTS cagra_pq_lens) - # setup_workspace_vpq Note: Metric is no longer in the kernel name - VPQ only supports # L2Expanded set(kernel_name "setup_workspace_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore set(pq_bits "${cagra_pq_bits}") + set(pq_len "${pq_len}") set(codebook_type "${cagra_codebook_type}") set(query_type "half") set(query_type_abbrev "h") + set(query_type_suffix "") + set(query_type_suffix_reg "") + set(pq_prefix "_vpq") + set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") + set(codebook_tag "tag_codebook_half") + set(codebook_tag_comma ", ") + set(impl_file "setup_workspace_vpq_impl.cuh") set(data_type "${data_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace.cu.in" "${filename}" @ONLY ) @@ -488,24 +531,30 @@ function(generate_jit_lto_kernels target) EMBEDDED_ARRAY "embedded_${kernel_name}" ) - # compute_distance_vpq Note: Metric is no longer in the kernel name - VPQ only supports # L2Expanded set(kernel_name "compute_distance_vpq_t${team_size}_dim${dataset_block_dim}_${cagra_pq_bits}pq_${pq_len}subd_${type_abbrev}_${cagra_index_abbrev}_${cagra_distance_abbrev}" ) set(filename "${generated_kernels_dir}/cagra_device_functions/fatbin_${kernel_name}.cu") - # VPQ only supports L2Expanded, but we don't need to pass metric to the template anymore set(pq_bits "${cagra_pq_bits}") + set(pq_len "${pq_len}") set(codebook_type "${cagra_codebook_type}") set(query_type "half") set(query_type_abbrev "h") + set(query_type_suffix "") + set(query_type_suffix_reg "") + set(pq_prefix "_vpq") + set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") + set(codebook_tag "tag_codebook_half") + set(codebook_tag_comma ", ") + set(impl_file "compute_distance_vpq_impl.cuh") set(idx_abbrev "${cagra_index_abbrev}") set(dist_abbrev "${cagra_distance_abbrev}") set(data_type "${data_type}") set(index_type "${cagra_index_type}") set(distance_type "${cagra_distance_type}") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance.cu.in" "${filename}" @ONLY ) @@ -579,6 +628,7 @@ function(generate_jit_lto_kernels target) set(pq_suffix "") set(pq_prefix "") set(codebook_tag "") + set(codebook_tag_comma "") set(query_type "${query_type}") set(query_type_abbrev "${query_type_abbrev}") set(index_type "${cagra_index_type}") @@ -613,6 +663,7 @@ function(generate_jit_lto_kernels target) set(pq_suffix "") set(pq_prefix "") set(codebook_tag "") + set(codebook_tag_comma "") set(query_type "${query_type}") set(query_type_abbrev "${query_type_abbrev}") set(index_type "${cagra_index_type}") @@ -677,7 +728,8 @@ function(generate_jit_lto_kernels target) set(codebook_type "${cagra_codebook_type}") set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "") - set(codebook_tag ", tag_codebook_half") + set(codebook_tag "tag_codebook_half") + set(codebook_tag_comma ", ") set(query_type "half") set(query_type_abbrev "h") set(index_type "${cagra_index_type}") @@ -715,7 +767,8 @@ function(generate_jit_lto_kernels target) set(codebook_type "${cagra_codebook_type}") set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "") - set(codebook_tag ", tag_codebook_half") + set(codebook_tag "tag_codebook_half") + set(codebook_tag_comma ", ") set(query_type "half") set(query_type_abbrev "h") set(index_type "${cagra_index_type}") @@ -783,6 +836,7 @@ function(generate_jit_lto_kernels target) set(pq_suffix "") set(pq_prefix "") set(codebook_tag "") + set(codebook_tag_comma "") set(query_type "${query_type}") set(query_type_abbrev "${query_type_abbrev}") set(index_type "${cagra_index_type}") @@ -833,7 +887,8 @@ function(generate_jit_lto_kernels target) set(codebook_type "${cagra_codebook_type}") set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "_vpq") - set(codebook_tag ", tag_codebook_half") + set(codebook_tag "tag_codebook_half") + set(codebook_tag_comma ", ") set(query_type "half") set(query_type_abbrev "h") set(index_type "${cagra_index_type}") @@ -892,6 +947,7 @@ function(generate_jit_lto_kernels target) set(pq_suffix "") set(pq_prefix "") set(codebook_tag "") + set(codebook_tag_comma "") set(query_type "${query_type}") set(query_type_abbrev "${query_type_abbrev}") set(index_type "${cagra_index_type}") @@ -927,6 +983,7 @@ function(generate_jit_lto_kernels target) set(pq_suffix "") set(pq_prefix "") set(codebook_tag "") + set(codebook_tag_comma "") set(query_type "${query_type}") set(query_type_abbrev "${query_type_abbrev}") set(index_type "${cagra_index_type}") @@ -973,7 +1030,8 @@ function(generate_jit_lto_kernels target) set(codebook_type "${cagra_codebook_type}") set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "_vpq") - set(codebook_tag ", tag_codebook_half") + set(codebook_tag "tag_codebook_half") + set(codebook_tag_comma ", ") set(query_type "half") set(query_type_abbrev "h") set(index_type "${cagra_index_type}") @@ -1010,7 +1068,8 @@ function(generate_jit_lto_kernels target) set(codebook_type "${cagra_codebook_type}") set(pq_suffix "_${cagra_pq_bits}pq_${pq_len}subd") set(pq_prefix "_vpq") - set(codebook_tag ", tag_codebook_half") + set(codebook_tag "tag_codebook_half") + set(codebook_tag_comma ", ") set(query_type "half") set(query_type_abbrev "h") set(index_type "${cagra_index_type}") diff --git a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.hpp b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.hpp index a5bb9b9162..efedf2ba91 100644 --- a/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.hpp +++ b/cpp/include/cuvs/detail/jit_lto/FragmentDatabase.hpp @@ -6,6 +6,7 @@ #pragma once #include +#include #include #include @@ -39,6 +40,7 @@ class FragmentDatabase { std::unique_ptr&& program, std::size_t size); + mutable std::mutex cache_mutex_; std::unordered_map> cache; }; diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 8dda21c69a..478a4c87c1 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -36,20 +36,10 @@ AlgorithmLauncher& AlgorithmLauncher::operator=(AlgorithmLauncher&& other) noexc void AlgorithmLauncher::call( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) { - if (kernel == nullptr) { RAFT_FAIL("AlgorithmLauncher::call - kernel is NULL!"); } - if (library == nullptr) { RAFT_FAIL("AlgorithmLauncher::call - library is NULL!"); } - if (kernel_args == nullptr) { RAFT_FAIL("AlgorithmLauncher::call - kernel_args is NULL!"); } - - cudaLaunchAttribute attribute[1]; - attribute[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; - attribute[0].val.programmaticStreamSerializationAllowed = 1; - cudaLaunchConfig_t config; config.gridDim = grid; config.blockDim = block; config.stream = stream; - config.attrs = attribute; - config.numAttrs = 1; config.dynamicSmemBytes = shared_mem; RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); @@ -58,18 +48,10 @@ void AlgorithmLauncher::call( void AlgorithmLauncher::call_cooperative( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) { - cudaLaunchAttribute attributes[2]; - attributes[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; - attributes[0].val.programmaticStreamSerializationAllowed = 1; - attributes[1].id = cudaLaunchAttributeCooperative; - attributes[1].val.cooperative = 1; - cudaLaunchConfig_t config; config.gridDim = grid; config.blockDim = block; config.stream = stream; - config.attrs = attributes; - config.numAttrs = 2; config.dynamicSmemBytes = shared_mem; RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 0ae28d3ced..4e1e24d6dd 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -63,7 +63,7 @@ std::shared_ptr AlgorithmPlanner::get_launcher() log_message += device_function + ","; } log_message.pop_back(); - RAFT_LOG_DEBUG("%s", log_message.c_str()); + RAFT_LOG_INFO("%s", log_message.c_str()); launchers[launch_key] = this->build(); } return launchers[launch_key]; diff --git a/cpp/src/detail/jit_lto/FragmentDatabase.cu b/cpp/src/detail/jit_lto/FragmentDatabase.cu index 0322c978e3..efe7139aa2 100644 --- a/cpp/src/detail/jit_lto/FragmentDatabase.cu +++ b/cpp/src/detail/jit_lto/FragmentDatabase.cu @@ -9,6 +9,8 @@ #include #include +#include + FragmentDatabase::FragmentDatabase() {} bool FragmentDatabase::make_cache_entry(std::string const& key) @@ -50,17 +52,23 @@ void registerFatbinFragment(std::string const& algo, auto& planner = fragment_database(); std::string key = algo; if (!params.empty()) { key += "_" + params; } - auto entry_exists = planner.make_cache_entry(key); - if (entry_exists) { return; } - planner.cache[key] = std::make_unique(key, blob, size); + { + std::lock_guard lock(planner.cache_mutex_); + auto entry_exists = planner.make_cache_entry(key); + if (entry_exists) { return; } + planner.cache[key] = std::make_unique(key, blob, size); + } } void registerNVRTCFragment(std::string const& key, std::unique_ptr&& program, std::size_t size) { - auto& planner = fragment_database(); - auto entry_exists = planner.make_cache_entry(key); - if (entry_exists) { return; } - planner.cache[key] = std::make_unique(key, std::move(program), size); + auto& planner = fragment_database(); + { + std::lock_guard lock(planner.cache_mutex_); + auto entry_exists = planner.make_cache_entry(key); + if (entry_exists) { return; } + planner.cache[key] = std::make_unique(key, std::move(program), size); + } } diff --git a/cpp/src/neighbors/detail/cagra/compute_distance.hpp b/cpp/src/neighbors/detail/cagra/compute_distance.hpp index 2466795514..5309831b2a 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/src/neighbors/detail/cagra/compute_distance.hpp @@ -246,15 +246,32 @@ struct dataset_descriptor_host { std::atomic ready; // Not sure if std::holds_alternative is thread-safe std::variant value; + state() : ready{false} + { + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::state() constructor - this=%p", + static_cast(this)); + } + template state(InitF init, size_t size) : ready{false}, value{std::make_tuple(init, size)} { + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] state::state(InitF, size_t) constructor - this=%p, size=%zu", + static_cast(this), + size); } ~state() noexcept { if (std::holds_alternative(value)) { auto& [ptr, stream] = std::get(value); + RAFT_LOG_INFO("[STREAM DEBUG] state::~state() - freeing ptr=%p on stream=%p", + static_cast(ptr), + static_cast(stream)); + // Synchronize the stream before freeing to ensure all kernels using this descriptor have + // completed This prevents use-after-free if kernels are still running when the destructor + // is called + RAFT_CUDA_TRY_NO_THROW(cudaStreamSynchronize(stream)); RAFT_CUDA_TRY_NO_THROW(cudaFreeAsync(ptr, stream)); } } @@ -265,17 +282,45 @@ struct dataset_descriptor_host { if (std::holds_alternative(value)) { auto& [fun, size] = std::get(value); dev_descriptor_t* ptr = nullptr; + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - allocating %zu bytes", size); RAFT_CUDA_TRY(cudaMallocAsync(&ptr, size, stream)); - fun(ptr, stream); - value = std::make_tuple(ptr, stream); - ready.store(true, std::memory_order_release); + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - allocated ptr=%p", + static_cast(ptr)); + try { + fun(ptr, stream); + value = std::make_tuple(ptr, stream); + ready.store(true, std::memory_order_release); + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - initialization complete, ready=true"); + } catch (...) { + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - exception caught, freeing ptr=%p", + static_cast(ptr)); + // If fun() throws, free the allocated memory before rethrowing + RAFT_CUDA_TRY_NO_THROW(cudaFreeAsync(ptr, stream)); + throw; + } + } else { + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - already initialized, skipping"); } } auto get(rmm::cuda_stream_view stream) -> dev_descriptor_t* { - if (!ready.load(std::memory_order_acquire)) { eval(stream); } - return std::get<0>(std::get(value)); + bool was_ready = ready.load(std::memory_order_acquire); + if (!was_ready) { + eval(stream); + // After eval(), value must be in ready_t state (either from this call or a concurrent one) + // If eval() threw, we won't reach here + was_ready = ready.load(std::memory_order_acquire); + } + // Only access value if we're sure it's in ready_t state + if (!was_ready || !std::holds_alternative(value)) { + RAFT_FAIL("Descriptor state is invalid - eval() must have failed"); + } + auto* ptr = std::get<0>(std::get(value)); + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::get() - was_ready=%d, ptr=%p", + was_ready, + static_cast(ptr)); + return ptr; } }; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in index 57f5c3938b..1cbb04acbf 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in @@ -11,7 +11,6 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -// Instantiate the apply_filter_kernel_jit function with concrete types template __global__ void apply_filter_kernel_jit<@index_type@, @distance_type@, @source_index_type@>( const @source_index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, const std::uint32_t, const @index_type@, uint32_t*, @source_index_type@, @source_index_type@); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization.cu.in similarity index 61% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization.cu.in index 60ecb02203..7e0e09a5cc 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization.cu.in @@ -7,7 +7,7 @@ #ifdef BUILD_KERNEL -#include +#include namespace cuvs::neighbors::cagra::detail { @@ -21,20 +21,20 @@ template __device__ @distance_type@ apply_normalization_standard<@team_size@, @d #include #include -#include "apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" +#include "apply_normalization_standard@normalization_suffix@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +__attribute__((__constructor__)) static void register_apply_normalization_standard@normalization_suffix@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() { using QueryTag = cuvs::neighbors::cagra::detail::tag_f; registerAlgorithm( - "apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@", - embedded_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); + "apply_normalization_standard@normalization_suffix@_t@team_size@_dim@dataset_block_dim@", + embedded_apply_normalization_standard@normalization_suffix@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, + sizeof(embedded_apply_normalization_standard@normalization_suffix@_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in deleted file mode 100644 index ae1fa2fbf0..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( - @distance_type@, const args_t, @index_type@); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; - registerAlgorithm( - "apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@", - embedded_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp new file mode 100644 index 0000000000..5f6242287c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp @@ -0,0 +1,135 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail { + +template +struct CagraPlannerBase : AlgorithmPlanner { + using AlgorithmPlanner::device_functions; + + CagraPlannerBase(const std::string& entrypoint, const std::string& params) + : AlgorithmPlanner(entrypoint, params) + { + } + void add_setup_workspace_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + std::string key = "setup_workspace_"; + if (is_vpq) { + key += "vpq_"; + auto params = make_fragment_key(); + key += "t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + if (!params.empty()) { key += "_" + params; } + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] setup_workspace VPQ key: %s (params: %s)", + key.c_str(), + params.c_str()); + } else { + key += "standard_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + auto params = make_fragment_key(); + if (!params.empty()) { key += "_" + params; } + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] setup_workspace standard key: %s (params: %s)", + key.c_str(), + params.c_str()); + } + this->device_functions.push_back(key); + } + + void add_compute_distance_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq, + uint32_t pq_bits = 0, + uint32_t pq_len = 0) + { + if (is_vpq) { + std::string key = "compute_distance_vpq_"; + auto params = make_fragment_key(); + key += "t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; + if (!params.empty()) { key += "_" + params; } + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] compute_distance VPQ key: %s (params: %s)", + key.c_str(), + params.c_str()); + this->device_functions.push_back(key); + } else { + std::string key = "compute_distance_standard_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + auto params = make_fragment_key(); + if (!params.empty()) { key += "_" + params; } + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] compute_distance standard key: %s (params: %s)", + key.c_str(), + params.c_str()); + this->device_functions.push_back(key); + add_dist_op_device_function(metric); + add_normalization_device_function(metric, team_size, dataset_block_dim); + } + } + + void add_dist_op_device_function(cuvs::distance::DistanceType metric) + { + std::string metric_tag; + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2Unexpanded: metric_tag = "l2"; break; + case cuvs::distance::DistanceType::InnerProduct: metric_tag = "inner_product"; break; + case cuvs::distance::DistanceType::CosineExpanded: metric_tag = "inner_product"; break; + case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; + default: metric_tag = "unknown"; break; + } + auto params = make_fragment_key(); + std::string key = "dist_op_" + metric_tag; + if (!params.empty()) { key += "_" + params; } + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] dist_op key: %s (params: %s)", key.c_str(), params.c_str()); + this->device_functions.push_back(key); + } + + void add_normalization_device_function(cuvs::distance::DistanceType metric, + uint32_t team_size, + uint32_t dataset_block_dim) + { + std::string normalization_type; + if (metric == cuvs::distance::DistanceType::CosineExpanded) { + normalization_type = "cosine"; + } else { + normalization_type = "noop"; + } + auto params = make_fragment_key(); + std::string key = "apply_normalization_standard_" + normalization_type; + key += "_t" + std::to_string(team_size); + key += "_dim" + std::to_string(dataset_block_dim); + if (!params.empty()) { key += "_" + params; } + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] apply_normalization key: %s (params: %s)", key.c_str(), params.c_str()); + this->device_functions.push_back(key); + } + + void add_sample_filter_device_function(std::string filter_name) + { + this->device_functions.push_back("sample_filter_" + filter_name); + } +}; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance.cu.in similarity index 50% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance.cu.in index 46756fe67d..e345a81b0b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance.cu.in @@ -7,12 +7,12 @@ #ifdef BUILD_KERNEL -#include +#include namespace cuvs::neighbors::cagra::detail { using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( +template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( const args_t, @index_type@); } // namespace cuvs::neighbors::cagra::detail @@ -21,20 +21,20 @@ template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block #include #include -#include "compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" +#include "compute_distance@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +__attribute__((__constructor__)) static void register_compute_distance@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix_reg@() { using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_type_abbrev@; registerAlgorithm( - "compute_distance_standard_t@team_size@_dim@dataset_block_dim@", - embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, - sizeof(embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); + QueryTag@codebook_tag_comma@ @codebook_tag@>( + "compute_distance@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", + embedded_compute_distance@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix@, + sizeof(embedded_compute_distance@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index 0f36470ad2..1c7c3a5223 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -11,11 +11,6 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -// Instantiate the compute_distance_to_child_nodes_kernel_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half -// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ void compute_distance_to_child_nodes_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@, cuvs::neighbors::filtering::none_sample_filter>( const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); @@ -34,7 +29,7 @@ __attribute__((__constructor__)) static void register_compute_distance_to_child_ registerAlgorithm( + tag_idx_@src_idx_abbrev@@codebook_tag_comma@ @codebook_tag@>( "compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, sizeof(embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in deleted file mode 100644 index e04e11369b..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in +++ /dev/null @@ -1,41 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( - const args_t, @index_type@); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - registerAlgorithm( - "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh index 2da95a015c..cf4de6e486 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -5,16 +5,11 @@ #pragma once -// This file contains extern function declarations for JIT LTO -// The actual descriptor type definitions are in -impl.cuh files which are included -// directly by the .cu.in files with CUVS_ENABLE_JIT_LTO defined -// Forward declarations must match the JIT LTO version (no Metric parameter) - -#include "../compute_distance.hpp" // For dataset_descriptor_base_t +#include "../compute_distance.hpp" #include namespace cuvs::neighbors::cagra::detail { -// Forward declarations matching the JIT LTO version (no Metric parameter, includes QueryT) + template struct cagra_q_dataset_descriptor_t; -} // namespace cuvs::neighbors::cagra::detail - -namespace cuvs::neighbors::cagra::detail { - -// All extern function declarations are in the cuvs::neighbors::cagra::detail namespace -// so they can be used by all search modes without being beholden to any specific sub-namespace -// Unified setup_workspace and compute_distance extern functions -// These take dataset_descriptor_base_t* and reconstruct the derived descriptor inside -// Standard and VPQ versions are in separate impl headers but use the same function name -// QueryT is needed to reconstruct the descriptor type correctly template extern __device__ bool sample_filter(uint32_t query_id, SourceIndexT node_id, void* filter_data); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h deleted file mode 100644 index 178ed6dd9b..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_data.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -// Use the shared filter_data.h -#include "../../jit_lto_kernels/filter_data.h" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in index eb1c8e1d29..dd1968eec1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -11,11 +11,6 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -// Instantiate the random_pickup_kernel_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half -// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ void random_pickup_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); @@ -34,7 +29,7 @@ __attribute__((__constructor__)) static void register_random_pickup_kernel@pq_pr registerAlgorithm( + tag_idx_@idx_abbrev@@codebook_tag_comma@ @codebook_tag@>( "random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, sizeof(embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index 648d10e977..33bea062c7 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -7,18 +7,11 @@ #ifdef BUILD_KERNEL -// Include helpers first so they're available when the JIT kernel is instantiated -#include // For pickup_next_parent and topk_by_bitonic_sort_wrapper_* - +#include #include namespace cuvs::neighbors::cagra::detail::multi_cta_search { -// Instantiate the search_kernel_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half -// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ void search_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@>( @index_type@* const, @distance_type@* const, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); @@ -37,7 +30,7 @@ __attribute__((__constructor__)) static void register_search_multi_cta_kernel@pq registerAlgorithm( + tag_idx_@src_idx_abbrev@@codebook_tag_comma@ @codebook_tag@>( "search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, sizeof(embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh index 5a8b6079eb..9bea64d857 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh @@ -23,8 +23,8 @@ #endif // Include extern function declarations before namespace so they're available to kernel definitions +#include "../../jit_lto_kernels/filter_data.h" #include "extern_device_functions.cuh" -#include "filter_data.h" // Include shared JIT device functions before namespace so they're available to kernel definitions #include "device_common_jit.cuh" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index 8966360c14..af01047be5 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -5,13 +5,10 @@ #pragma once -// Include tags header before namespace (it defines a namespace) -#include - -#include +#include "cagra_planner_base.hpp" #include +#include #include -#include #include // Use nested namespace syntax to allow inclusion from within parent namespace @@ -21,21 +18,23 @@ namespace cagra { namespace detail { namespace multi_cta_search { -template -struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { +template +struct CagraMultiCtaSearchPlanner + : CagraPlannerBase { CagraMultiCtaSearchPlanner(cuvs::distance::DistanceType metric, uint32_t team_size, uint32_t dataset_block_dim, bool is_vpq = false, uint32_t pq_bits = 0, uint32_t pq_len = 0) - : AlgorithmPlanner( + : CagraPlannerBase( build_entrypoint_name(metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len), - is_vpq ? make_fragment_key() + is_vpq ? make_fragment_key() : make_fragment_key()), entrypoint_name_( build_entrypoint_name(metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len)) @@ -44,125 +43,6 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { const std::string& get_entrypoint_name() const { return entrypoint_name_; } - void add_setup_workspace_device_function(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits = 0, - uint32_t pq_len = 0) - { - std::string key = "setup_workspace_"; - if (is_vpq) { - key += "vpq_"; - using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - auto params = make_fragment_key(); - key += "t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - key += "_" + params; - } else { - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; - key += "standard_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - auto params = make_fragment_key(); - key += "_" + params; - } - this->device_functions.push_back(key); - } - - void add_compute_distance_device_function(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits = 0, - uint32_t pq_len = 0) - { - if (is_vpq) { - std::string key = "compute_distance_vpq_"; - using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - auto params = make_fragment_key(); - key += "t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - key += "_" + params; - this->device_functions.push_back(key); - } else { - std::string key = "compute_distance_standard_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - if (metric == cuvs::distance::DistanceType::BitwiseHamming) { - using tag_uc = cuvs::neighbors::cagra::detail::tag_uc; - if constexpr (std::is_same_v) { - auto params = make_fragment_key(); - key += "_" + params; - } else { - auto params = make_fragment_key(); - key += "_" + params; - } - } else { - auto params = make_fragment_key(); - key += "_" + params; - } - this->device_functions.push_back(key); - add_dist_op_device_function(metric); - add_normalization_device_function(metric, team_size, dataset_block_dim); - } - } - - void add_dist_op_device_function(cuvs::distance::DistanceType metric) - { - std::string metric_tag; - switch (metric) { - case cuvs::distance::DistanceType::L2Expanded: - case cuvs::distance::DistanceType::L2Unexpanded: metric_tag = "l2"; break; - case cuvs::distance::DistanceType::InnerProduct: metric_tag = "inner_product"; break; - case cuvs::distance::DistanceType::CosineExpanded: - metric_tag = "inner_product"; // CosineExpanded uses inner_product dist_op - break; - case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; - default: metric_tag = "unknown"; break; - } - std::string params; - if (metric == cuvs::distance::DistanceType::BitwiseHamming) { - params = make_fragment_key(); - } else { - params = make_fragment_key(); - } - std::string key = "dist_op_" + metric_tag + "_" + params; - this->device_functions.push_back(key); - } - - void add_normalization_device_function(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim) - { - std::string normalization_type; - if (metric == cuvs::distance::DistanceType::CosineExpanded) { - normalization_type = "cosine"; - } else { - normalization_type = "noop"; - } - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; - auto params = make_fragment_key(); - std::string key = "apply_normalization_standard_" + normalization_type; - key += "_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - key += "_" + params; - this->device_functions.push_back(key); - } - - void add_sample_filter_device_function(std::string filter_name) - { - this->device_functions.push_back("sample_filter_" + filter_name); - } - private: std::string entrypoint_name_; @@ -180,18 +60,6 @@ struct CagraMultiCtaSearchPlanner : AlgorithmPlanner { if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } return name; } - - static std::string metric_to_string(cuvs::distance::DistanceType metric) - { - switch (metric) { - case cuvs::distance::DistanceType::L2Expanded: - case cuvs::distance::DistanceType::L2Unexpanded: return "L2Expanded"; - case cuvs::distance::DistanceType::InnerProduct: return "InnerProduct"; - case cuvs::distance::DistanceType::CosineExpanded: return "CosineExpanded"; - case cuvs::distance::DistanceType::BitwiseHamming: return "BitwiseHamming"; - default: return "Unknown"; - } - } }; } // namespace multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh index 6368a21f28..08bea3fa26 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh @@ -18,8 +18,8 @@ #include // For std::is_same_v, std::true_type, std::false_type // Include extern function declarations before namespace so they're available to kernel definitions +#include "../../jit_lto_kernels/filter_data.h" #include "extern_device_functions.cuh" -#include "filter_data.h" namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index 550101dbab..84f30e5ba7 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -5,21 +5,24 @@ #pragma once -// Include tags header before namespace (it defines a namespace) -#include - -#include +#include "cagra_planner_base.hpp" #include +#include #include -#include #include // Use nested namespace syntax to allow inclusion from within parent namespace namespace cuvs::neighbors::cagra::detail { namespace multi_kernel_search { -template -struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { +template +struct CagraMultiKernelSearchPlanner + : CagraPlannerBase { CagraMultiKernelSearchPlanner(cuvs::distance::DistanceType metric, const std::string& kernel_name, uint32_t team_size, @@ -27,19 +30,16 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { bool is_vpq = false, uint32_t pq_bits = 0, uint32_t pq_len = 0) - : AlgorithmPlanner( + : CagraPlannerBase( build_entrypoint_name( kernel_name, metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len), // Special case: apply_filter_kernel doesn't use DataTag, only IndexTag, DistanceTag, // SourceIndexTag (kernel_name == "apply_filter_kernel") ? make_fragment_key() - : (is_vpq ? make_fragment_key() - : make_fragment_key())), + : (is_vpq + ? make_fragment_key() + : make_fragment_key())), entrypoint_name_(build_entrypoint_name( kernel_name, metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len)) { @@ -47,125 +47,6 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { const std::string& get_entrypoint_name() const { return entrypoint_name_; } - void add_setup_workspace_device_function(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits = 0, - uint32_t pq_len = 0) - { - std::string key = "setup_workspace_"; - if (is_vpq) { - key += "vpq_"; - using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - auto params = make_fragment_key(); - key += "t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - key += "_" + params; - } else { - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; - key += "standard_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - auto params = make_fragment_key(); - key += "_" + params; - } - this->device_functions.push_back(key); - } - - void add_compute_distance_device_function(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits = 0, - uint32_t pq_len = 0) - { - if (is_vpq) { - std::string key = "compute_distance_vpq_"; - using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - auto params = make_fragment_key(); - key += "t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - key += "_" + params; - this->device_functions.push_back(key); - } else { - std::string key = "compute_distance_standard_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - if (metric == cuvs::distance::DistanceType::BitwiseHamming) { - using tag_uc = cuvs::neighbors::cagra::detail::tag_uc; - if constexpr (std::is_same_v) { - auto params = make_fragment_key(); - key += "_" + params; - } else { - auto params = make_fragment_key(); - key += "_" + params; - } - } else { - auto params = make_fragment_key(); - key += "_" + params; - } - this->device_functions.push_back(key); - add_dist_op_device_function(metric); - add_normalization_device_function(metric, team_size, dataset_block_dim); - } - } - - void add_dist_op_device_function(cuvs::distance::DistanceType metric) - { - std::string metric_tag; - switch (metric) { - case cuvs::distance::DistanceType::L2Expanded: - case cuvs::distance::DistanceType::L2Unexpanded: metric_tag = "l2"; break; - case cuvs::distance::DistanceType::InnerProduct: metric_tag = "inner_product"; break; - case cuvs::distance::DistanceType::CosineExpanded: - metric_tag = "inner_product"; // CosineExpanded uses inner_product dist_op - break; - case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; - default: metric_tag = "unknown"; break; - } - std::string params; - if (metric == cuvs::distance::DistanceType::BitwiseHamming) { - params = make_fragment_key(); - } else { - params = make_fragment_key(); - } - std::string key = "dist_op_" + metric_tag + "_" + params; - this->device_functions.push_back(key); - } - - void add_normalization_device_function(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim) - { - std::string normalization_type; - if (metric == cuvs::distance::DistanceType::CosineExpanded) { - normalization_type = "cosine"; - } else { - normalization_type = "noop"; - } - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; - auto params = make_fragment_key(); - std::string key = "apply_normalization_standard_" + normalization_type; - key += "_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - key += "_" + params; - this->device_functions.push_back(key); - } - - void add_sample_filter_device_function(std::string filter_name) - { - this->device_functions.push_back("sample_filter_" + filter_name); - } - void set_entrypoint_name(const std::string& name) { entrypoint_name_ = name; } private: @@ -188,18 +69,6 @@ struct CagraMultiKernelSearchPlanner : AlgorithmPlanner { if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } return name; } - - static std::string metric_to_string(cuvs::distance::DistanceType metric) - { - switch (metric) { - case cuvs::distance::DistanceType::L2Expanded: - case cuvs::distance::DistanceType::L2Unexpanded: return "L2Expanded"; - case cuvs::distance::DistanceType::InnerProduct: return "InnerProduct"; - case cuvs::distance::DistanceType::CosineExpanded: return "CosineExpanded"; - case cuvs::distance::DistanceType::BitwiseHamming: return "BitwiseHamming"; - default: return "Unknown"; - } - } }; } // namespace multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index e7d314e505..af33f7c229 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -11,11 +11,6 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { -// Instantiate the search_kernel_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half -// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@>( uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); @@ -34,7 +29,7 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_@ registerAlgorithm( + tag_idx_@src_idx_abbrev@@codebook_tag_comma@ @codebook_tag@>( "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, sizeof(embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh index c9b42fffc8..fcc851b3b5 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh @@ -61,8 +61,8 @@ #include // Include extern function declarations before namespace so they're available to kernel definitions +#include "../../jit_lto_kernels/filter_data.h" #include "extern_device_functions.cuh" -#include "filter_data.h" // Include shared JIT device functions #include "device_common_jit.cuh" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in index 1cbaddd41d..dd7d7c709e 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in @@ -11,11 +11,6 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { -// Instantiate the search_kernel_p_jit function with unified template parameters -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for BitwiseHamming) -// For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half -// Note: Metric is no longer a template parameter - it's linked via dist_op and normalization fragments -// Note: Kernels use dataset_descriptor_base_t* pointer directly template __global__ void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@>( worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); @@ -34,7 +29,7 @@ __attribute__((__constructor__)) static void register_search_single_cta_kernel_p registerAlgorithm( + tag_idx_@src_idx_abbrev@@codebook_tag_comma@ @codebook_tag@>( "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, sizeof(embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index 8a75b4ed48..9cd3b41e73 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -5,11 +5,9 @@ #pragma once -#include - -#include -#include +#include "cagra_planner_base.hpp" #include +#include #include #include @@ -19,153 +17,36 @@ namespace cagra { namespace detail { namespace single_cta_search { -template -struct CagraSearchPlanner : AlgorithmPlanner { - CagraSearchPlanner(cuvs::distance::DistanceType metric, - bool topk_by_bitonic_sort, - bool bitonic_sort_and_merge_multi_warps, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq = false, - uint32_t pq_bits = 0, - uint32_t pq_len = 0, - bool persistent = false) - : AlgorithmPlanner(build_entrypoint_name(metric, - topk_by_bitonic_sort, - bitonic_sort_and_merge_multi_warps, - team_size, - dataset_block_dim, - is_vpq, - pq_bits, - pq_len, - persistent), - is_vpq - ? make_fragment_key() - : make_fragment_key()) - { - } - - void add_setup_workspace_device_function(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits = 0, - uint32_t pq_len = 0) - { - std::string key = "setup_workspace_"; - if (is_vpq) { - key += "vpq_"; - using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - auto params = make_fragment_key(); - key += "t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - key += "_" + params; - } else { - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; - key += "standard_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - auto params = make_fragment_key(); - key += "_" + params; - } - this->device_functions.push_back(key); - } - - void add_compute_distance_device_function(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits = 0, - uint32_t pq_len = 0) +template +struct CagraSingleCtaSearchPlanner + : CagraPlannerBase { + CagraSingleCtaSearchPlanner(cuvs::distance::DistanceType metric, + bool topk_by_bitonic_sort, + bool bitonic_sort_and_merge_multi_warps, + uint32_t team_size, + uint32_t dataset_block_dim, + bool is_vpq = false, + uint32_t pq_bits = 0, + uint32_t pq_len = 0, + bool persistent = false) + : CagraPlannerBase( + build_entrypoint_name(metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + team_size, + dataset_block_dim, + is_vpq, + pq_bits, + pq_len, + persistent), + is_vpq ? make_fragment_key() + : make_fragment_key()) { - if (is_vpq) { - std::string key = "compute_distance_vpq_"; - using CodebookTag = cuvs::neighbors::cagra::detail::tag_codebook_half; - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - auto params = make_fragment_key(); - key += "t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; - key += "_" + params; - this->device_functions.push_back(key); - } else { - std::string key = "compute_distance_standard_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - if (metric == cuvs::distance::DistanceType::BitwiseHamming) { - using tag_uc = cuvs::neighbors::cagra::detail::tag_uc; - if constexpr (std::is_same_v) { - auto params = make_fragment_key(); - key += "_" + params; - } else { - auto params = make_fragment_key(); - key += "_" + params; - } - } else { - auto params = make_fragment_key(); - key += "_" + params; - } - this->device_functions.push_back(key); - add_dist_op_device_function(metric); - add_normalization_device_function(metric, team_size, dataset_block_dim); - } - } - - void add_dist_op_device_function(cuvs::distance::DistanceType metric) - { - std::string metric_tag; - switch (metric) { - case cuvs::distance::DistanceType::L2Expanded: - case cuvs::distance::DistanceType::L2Unexpanded: metric_tag = "l2"; break; - case cuvs::distance::DistanceType::InnerProduct: metric_tag = "inner_product"; break; - case cuvs::distance::DistanceType::CosineExpanded: - metric_tag = "inner_product"; // CosineExpanded uses inner_product dist_op - break; - case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; - default: metric_tag = "unknown"; break; - } - std::string params; - if (metric == cuvs::distance::DistanceType::BitwiseHamming) { - params = make_fragment_key(); - } else { - params = make_fragment_key(); - } - std::string key = "dist_op_" + metric_tag + "_" + params; - this->device_functions.push_back(key); - } - - void add_normalization_device_function(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim) - { - std::string normalization_type; - if (metric == cuvs::distance::DistanceType::CosineExpanded) { - normalization_type = "cosine"; - } else { - normalization_type = "noop"; - } - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; - auto params = make_fragment_key(); - std::string key = "apply_normalization_standard_" + normalization_type; - key += "_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); - key += "_" + params; - this->device_functions.push_back(key); - } - - void add_sample_filter_device_function(std::string filter_name) - { - this->device_functions.push_back("sample_filter_" + filter_name); } private: @@ -189,18 +70,6 @@ struct CagraSearchPlanner : AlgorithmPlanner { } static std::string bool_to_string(bool b) { return b ? "true" : "false"; } - - static std::string metric_to_string(cuvs::distance::DistanceType metric) - { - switch (metric) { - case cuvs::distance::DistanceType::L2Expanded: - case cuvs::distance::DistanceType::L2Unexpanded: return "L2Expanded"; - case cuvs::distance::DistanceType::InnerProduct: return "InnerProduct"; - case cuvs::distance::DistanceType::CosineExpanded: return "CosineExpanded"; - case cuvs::distance::DistanceType::BitwiseHamming: return "BitwiseHamming"; - default: return "Unknown"; - } - } }; } // namespace single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace.cu.in similarity index 51% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace.cu.in index a7b75ec067..ec97659140 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace.cu.in @@ -7,11 +7,11 @@ #ifdef BUILD_KERNEL -#include +#include namespace cuvs::neighbors::cagra::detail { -template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( +template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail @@ -20,20 +20,20 @@ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@d #include #include -#include "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" +#include "setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix@.h" using namespace cuvs::neighbors::cagra::detail; -__attribute__((__constructor__)) static void register_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() +__attribute__((__constructor__)) static void register_setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix_reg@() { using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_type_abbrev@; registerAlgorithm( - "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@", - embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, - sizeof(embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); + QueryTag@codebook_tag_comma@ @codebook_tag@>( + "setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", + embedded_setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix@, + sizeof(embedded_setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix@)); } #endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in deleted file mode 100644 index ac2b791f4a..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( - cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - registerAlgorithm( - "setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 0329e68d56..e2d54f234b 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -60,6 +60,20 @@ void select_and_run_jit( SampleFilterT sample_filter, cudaStream_t stream) { + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] select_and_run_jit (multi_cta) - is_vpq=%d, metric=%d, team_size=%u, " + "dataset_block_dim=%u, pq_bits=%u, pq_len=%u, queries_ptr=%p, topk_indices_ptr=%p, " + "topk_distances_ptr=%p", + dataset_desc.is_vpq, + static_cast(dataset_desc.metric), + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.pq_bits, + dataset_desc.pq_len, + static_cast(queries_ptr), + static_cast(topk_indices_ptr), + static_cast(topk_distances_ptr)); + // Extract bitset data from filter object (if it's a bitset_filter) uint32_t* bitset_ptr = nullptr; SourceIndexT bitset_len = 0; @@ -89,48 +103,110 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - // For multi_cta, we don't use topk_by_bitonic_sort or bitonic_sort_and_merge_multi_warps - // These are handled inside the kernel based on max_elements - // We need to construct the entrypoint name manually since it's different from single_cta - std::string metric_name_full; - if (dataset_desc.metric == cuvs::distance::DistanceType::L2Expanded) { - metric_name_full = "L2Expanded"; - } else if (dataset_desc.metric == cuvs::distance::DistanceType::InnerProduct) { - metric_name_full = "InnerProduct"; - } else if (dataset_desc.metric == cuvs::distance::DistanceType::CosineExpanded) { - metric_name_full = "CosineExpanded"; - } else { - RAFT_FAIL("Unsupported metric for multi_cta JIT kernel"); - } + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] multi_cta - DataTag=%s, IndexTag=%s, DistTag=%s, SourceTag=%s", + typeid(DataTag).name(), + typeid(IndexTag).name(), + typeid(DistTag).name(), + typeid(SourceTag).name()); // Create planner and register device functions // Pass team_size, dataset_block_dim, and VPQ parameters to match the kernel entrypoint name - CagraMultiCtaSearchPlanner planner( - dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - - planner.add_setup_workspace_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - planner.add_compute_distance_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - std::string filter_name = get_sample_filter_name(); - planner.add_sample_filter_device_function(filter_name); - - // Get launcher using the planner's entrypoint name and fragment key - auto params = make_fragment_key(); - auto launcher = planner.get_launcher(); + std::shared_ptr launcher; + if (dataset_desc.is_vpq) { + using QueryTag = query_type_tag_vpq_t; + using CodebookTag = codebook_tag_vpq_t; + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] multi_cta VPQ path - QueryTag=%s, CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraMultiCtaSearchPlanner + planner(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + std::string filter_name = get_sample_filter_name(); + planner.add_sample_filter_device_function(filter_name); + launcher = planner.get_launcher(); + } else { + using CodebookTag = codebook_tag_standard_t; + if (dataset_desc.metric == cuvs::distance::DistanceType::BitwiseHamming) { + using QueryTag = + query_type_tag_standard_t; + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] multi_cta Standard path (BitwiseHamming) - QueryTag=%s, " + "CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraMultiCtaSearchPlanner + planner(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + std::string filter_name = get_sample_filter_name(); + planner.add_sample_filter_device_function(filter_name); + launcher = planner.get_launcher(); + } else { + using QueryTag = query_type_tag_standard_t; + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] multi_cta Standard path (non-BitwiseHamming) - QueryTag=%s, " + "CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraMultiCtaSearchPlanner + planner(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + std::string filter_name = get_sample_filter_name(); + planner.add_sample_filter_device_function(filter_name); + launcher = planner.get_launcher(); + } + } if (!launcher) { RAFT_FAIL("Failed to get JIT launcher"); } @@ -161,9 +237,12 @@ void select_and_run_jit( dim3 grid_dims(num_cta_per_query, num_queries, 1); // Get the device descriptor pointer + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] multi_cta About to call dev_ptr()"); const dataset_descriptor_base_t* dev_desc_base = dataset_desc.dev_ptr(stream); const auto* dev_desc = dev_desc_base; + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] multi_cta dev_ptr() returned: %p", + static_cast(dev_desc)); // Note: dataset_desc is passed by const reference, so it stays alive for the duration of this // function The descriptor's state is managed by a shared_ptr internally, so no need to explicitly diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh index db0fdf940f..6a913bd7e8 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -49,34 +49,115 @@ void random_pickup_jit(const dataset_descriptor_host& std::uint32_t hash_bitlen, cudaStream_t cuda_stream) { + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] random_pickup_jit - is_vpq=%d, metric=%d, team_size=%u, " + "dataset_block_dim=%u, pq_bits=%u, pq_len=%u, queries_ptr=%p", + dataset_desc.is_vpq, + static_cast(dataset_desc.metric), + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.pq_bits, + dataset_desc.pq_len, + static_cast(queries_ptr)); + // Create planner with tags using DataTag = decltype(get_data_type_tag()); using IndexTag = decltype(get_index_type_tag()); using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); // Use IndexT for source + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] random_pickup - DataTag=%s, IndexTag=%s, DistTag=%s, SourceTag=%s", + typeid(DataTag).name(), + typeid(IndexTag).name(), + typeid(DistTag).name(), + typeid(SourceTag).name()); + // Create planner and register device functions - CagraMultiKernelSearchPlanner planner( - dataset_desc.metric, - "random_pickup_kernel", - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - planner.add_setup_workspace_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - planner.add_compute_distance_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - auto launcher = planner.get_launcher(); + std::shared_ptr launcher; + if (dataset_desc.is_vpq) { + using QueryTag = query_type_tag_vpq_t; + using CodebookTag = codebook_tag_vpq_t; + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] random_pickup VPQ path - QueryTag=%s, CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraMultiKernelSearchPlanner + planner(dataset_desc.metric, + "random_pickup_kernel", + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + launcher = planner.get_launcher(); + } else { + using CodebookTag = codebook_tag_standard_t; + if (dataset_desc.metric == cuvs::distance::DistanceType::BitwiseHamming) { + using QueryTag = + query_type_tag_standard_t; + CagraMultiKernelSearchPlanner + planner(dataset_desc.metric, + "random_pickup_kernel", + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + launcher = planner.get_launcher(); + } else { + using QueryTag = query_type_tag_standard_t; + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] random_pickup Standard path (non-BitwiseHamming) - QueryTag=%s, " + "CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraMultiKernelSearchPlanner + planner(dataset_desc.metric, + "random_pickup_kernel", + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + launcher = planner.get_launcher(); + } + } const auto block_size = 256u; const auto num_teams_per_threadblock = block_size / dataset_desc.team_size; @@ -84,7 +165,10 @@ void random_pickup_jit(const dataset_descriptor_host& num_queries); // Get the device descriptor pointer + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] random_pickup About to call dev_ptr()"); const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] random_pickup dev_ptr() returned: %p", + static_cast(dev_desc)); // Cast size_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly @@ -137,35 +221,118 @@ void compute_distance_to_child_nodes_jit( SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream) { + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes_jit - is_vpq=%d, metric=%d, " + "team_size=%u, " + "dataset_block_dim=%u, pq_bits=%u, pq_len=%u, query_ptr=%p", + dataset_desc.is_vpq, + static_cast(dataset_desc.metric), + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.pq_bits, + dataset_desc.pq_len, + static_cast(query_ptr)); + // Create planner with tags using DataTag = decltype(get_data_type_tag()); using IndexTag = decltype(get_index_type_tag()); using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - // Create planner and register device functions - CagraMultiKernelSearchPlanner planner( - dataset_desc.metric, - "compute_distance_to_child_nodes_kernel", - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - planner.add_setup_workspace_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - planner.add_compute_distance_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes - DataTag=%s, IndexTag=%s, DistTag=%s, " + "SourceTag=%s", + typeid(DataTag).name(), + typeid(IndexTag).name(), + typeid(DistTag).name(), + typeid(SourceTag).name()); - auto launcher = planner.get_launcher(); + // Create planner and register device functions + std::shared_ptr launcher; + if (dataset_desc.is_vpq) { + using QueryTag = query_type_tag_vpq_t; + using CodebookTag = codebook_tag_vpq_t; + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes VPQ path - QueryTag=%s, CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraMultiKernelSearchPlanner + planner(dataset_desc.metric, + "compute_distance_to_child_nodes_kernel", + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + launcher = planner.get_launcher(); + } else { + using CodebookTag = codebook_tag_standard_t; + if (dataset_desc.metric == cuvs::distance::DistanceType::BitwiseHamming) { + using QueryTag = + query_type_tag_standard_t; + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes Standard path (BitwiseHamming) - " + "QueryTag=%s, CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraMultiKernelSearchPlanner + planner(dataset_desc.metric, + "compute_distance_to_child_nodes_kernel", + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + launcher = planner.get_launcher(); + } else { + using QueryTag = query_type_tag_standard_t; + CagraMultiKernelSearchPlanner + planner(dataset_desc.metric, + "compute_distance_to_child_nodes_kernel", + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + launcher = planner.get_launcher(); + } + } const auto block_size = 128; const auto teams_per_block = block_size / dataset_desc.team_size; @@ -173,7 +340,10 @@ void compute_distance_to_child_nodes_jit( num_queries); // Get the device descriptor pointer + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes About to call dev_ptr()"); const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes dev_ptr() returned: %p", + static_cast(dev_desc)); // Dispatch kernel via launcher launcher->dispatch(cuda_stream, @@ -242,14 +412,16 @@ void apply_filter_jit(const SourceIndexT* source_indices_ptr, // Create planner - apply_filter doesn't use dataset_descriptor, so we use dummy values // The kernel name is "apply_filter_kernel" and build_entrypoint_name will handle it specially - CagraMultiKernelSearchPlanner planner( - cuvs::distance::DistanceType::L2Expanded, - "apply_filter_kernel", - 8, - 128, - false, - 0, - 0); // Dummy values, not used by apply_filter + using QueryTag = query_type_tag_standard_t; + using CodebookTag = void; + CagraMultiKernelSearchPlanner + planner(cuvs::distance::DistanceType::L2Expanded, + "apply_filter_kernel", + 8, + 128, + false, + 0, + 0); // Dummy values, not used by apply_filter // Add sample filter device function - determine filter type from template parameter planner.add_sample_filter_device_function(get_sample_filter_name()); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 3b3bacb0ae..8469b5b430 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -574,6 +574,20 @@ void select_and_run_jit( SampleFilterT sample_filter, cudaStream_t stream) { + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] select_and_run_jit - is_vpq=%d, metric=%d, team_size=%u, " + "dataset_block_dim=%u, pq_bits=%u, pq_len=%u, queries_ptr=%p, topk_indices_ptr=%p, " + "topk_distances_ptr=%p", + dataset_desc.is_vpq, + static_cast(dataset_desc.metric), + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.pq_bits, + dataset_desc.pq_len, + static_cast(queries_ptr), + reinterpret_cast(topk_indices_ptr), + static_cast(topk_distances_ptr)); + const SourceIndexT* source_indices_ptr = source_indices.has_value() ? source_indices->data_handle() : nullptr; @@ -620,37 +634,117 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - CagraSearchPlanner planner( - dataset_desc.metric, - topk_by_bitonic_sort, - bitonic_sort_and_merge_multi_warps, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len, - true /* persistent */); - - // Add device functions - planner.add_setup_workspace_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - planner.add_compute_distance_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - planner.add_sample_filter_device_function(get_sample_filter_name()); - - // Get launcher for persistent kernel - auto launcher = planner.get_launcher(); + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] Persistent kernel - DataTag=%s, IndexTag=%s, DistTag=%s, SourceTag=%s", + typeid(DataTag).name(), + typeid(IndexTag).name(), + typeid(DistTag).name(), + typeid(SourceTag).name()); + + std::shared_ptr launcher; + if (dataset_desc.is_vpq) { + using QueryTag = query_type_tag_vpq_t; + using CodebookTag = codebook_tag_vpq_t; + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] VPQ path - QueryTag=%s, CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraSingleCtaSearchPlanner + planner(dataset_desc.metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len, + true /* persistent */); + + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_sample_filter_device_function(get_sample_filter_name()); + launcher = planner.get_launcher(); + } else { + using CodebookTag = codebook_tag_standard_t; + if (dataset_desc.metric == cuvs::distance::DistanceType::BitwiseHamming) { + using QueryTag = + query_type_tag_standard_t; + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] Standard path (BitwiseHamming) - QueryTag=%s, CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraSingleCtaSearchPlanner + planner(dataset_desc.metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len, + true /* persistent */); + + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_sample_filter_device_function(get_sample_filter_name()); + launcher = planner.get_launcher(); + } else { + using QueryTag = + query_type_tag_standard_t; + CagraSingleCtaSearchPlanner + planner(dataset_desc.metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len, + true /* persistent */); + + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_sample_filter_device_function(get_sample_filter_name()); + launcher = planner.get_launcher(); + } + } if (!launcher) { RAFT_FAIL("Failed to get JIT launcher for CAGRA persistent search kernel"); } // Use get_runner pattern similar to non-JIT version + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] About to call get_runner_jit and dev_ptr() for persistent kernel"); + const auto* dev_desc_persistent = dataset_desc.dev_ptr(stream); + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] dev_ptr() for persistent kernel returned: %p", + static_cast(dev_desc_persistent)); get_runner_jit(std::cref(dataset_desc), graph, source_indices_ptr, @@ -673,7 +767,7 @@ void select_and_run_jit( ps.persistent_lifetime, ps.persistent_device_usage, launcher, - dataset_desc.dev_ptr(stream)) // Pass descriptor pointer + dev_desc_persistent) // Pass descriptor pointer ->launch(topk_indices_ptr, topk_distances_ptr, queries_ptr, num_queries, topk); return; } else { @@ -683,37 +777,117 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - CagraSearchPlanner planner( - dataset_desc.metric, - topk_by_bitonic_sort, - bitonic_sort_and_merge_multi_warps, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - - // Add device functions (tags are determined inside the planner methods) - planner.add_setup_workspace_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - planner.add_compute_distance_device_function(dataset_desc.metric, - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.is_vpq, - dataset_desc.pq_bits, - dataset_desc.pq_len); - planner.add_sample_filter_device_function(get_sample_filter_name()); - - // Get launcher - auto launcher = planner.get_launcher(); + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] Regular kernel - DataTag=%s, IndexTag=%s, DistTag=%s, SourceTag=%s", + typeid(DataTag).name(), + typeid(IndexTag).name(), + typeid(DistTag).name(), + typeid(SourceTag).name()); + + std::shared_ptr launcher; + if (dataset_desc.is_vpq) { + using QueryTag = query_type_tag_vpq_t; + using CodebookTag = codebook_tag_vpq_t; + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] VPQ path - QueryTag=%s, CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraSingleCtaSearchPlanner + planner(dataset_desc.metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_sample_filter_device_function(get_sample_filter_name()); + launcher = planner.get_launcher(); + } else { + using CodebookTag = codebook_tag_standard_t; + if (dataset_desc.metric == cuvs::distance::DistanceType::BitwiseHamming) { + using QueryTag = + query_type_tag_standard_t; + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] Standard path (BitwiseHamming) - QueryTag=%s, CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraSingleCtaSearchPlanner + planner(dataset_desc.metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_sample_filter_device_function(get_sample_filter_name()); + launcher = planner.get_launcher(); + } else { + using QueryTag = + query_type_tag_standard_t; + RAFT_LOG_INFO( + "[JIT FRAGMENT DEBUG] Standard path (non-BitwiseHamming) - QueryTag=%s, CodebookTag=%s", + typeid(QueryTag).name(), + typeid(CodebookTag).name()); + CagraSingleCtaSearchPlanner + planner(dataset_desc.metric, + topk_by_bitonic_sort, + bitonic_sort_and_merge_multi_warps, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + + planner.add_setup_workspace_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_compute_distance_device_function(dataset_desc.metric, + dataset_desc.team_size, + dataset_desc.dataset_block_dim, + dataset_desc.is_vpq, + dataset_desc.pq_bits, + dataset_desc.pq_len); + planner.add_sample_filter_device_function(get_sample_filter_name()); + launcher = planner.get_launcher(); + } + } if (!launcher) { RAFT_FAIL("Failed to get JIT launcher for CAGRA search kernel"); } // Get the device descriptor pointer - dev_ptr() initializes it if needed + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] About to call dev_ptr()"); const auto* dev_desc = dataset_desc.dev_ptr(stream); + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] dev_ptr() returned: %p", + static_cast(dev_desc)); // Cast size_t/int64_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly @@ -736,6 +910,7 @@ void select_and_run_jit( smem_size); // Dispatch kernel via launcher + RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] About to dispatch kernel via launcher"); launcher->dispatch( stream, grid, diff --git a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp index be2b6481a7..3085616a23 100644 --- a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp +++ b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp @@ -51,6 +51,36 @@ constexpr auto get_source_index_type_tag() if constexpr (std::is_same_v) { return tag_idx_l{}; } } +template +struct query_type_tag_standard { + using type = std::conditional_t, + tag_uc, + tag_f>; +}; + +template +using query_type_tag_standard_t = typename query_type_tag_standard::type; + +template +using query_type_tag_vpq_t = tag_h; + +template +using query_type_tag_standard_l2_t = + query_type_tag_standard_t; +template +using query_type_tag_standard_inner_product_t = + query_type_tag_standard_t; +template +using query_type_tag_standard_cosine_t = + query_type_tag_standard_t; +template +using query_type_tag_standard_hamming_t = + query_type_tag_standard_t; + +using codebook_tag_vpq_t = tag_codebook_half; +using codebook_tag_standard_t = void; + // Helper trait to detect if a type is a bitset_filter (regardless of template parameters) template struct is_bitset_filter : std::false_type {}; diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp index 30c7287430..678274ffd5 100644 --- a/cpp/src/neighbors/detail/cagra/utils.hpp +++ b/cpp/src/neighbors/detail/cagra/utils.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -282,8 +282,10 @@ void copy_with_padding( raft::make_device_mdarray(res, mr, raft::make_extents(src.extent(0), padded_dim)); } if (dst.extent(1) == src.extent(1)) { - raft::copy( - dst.data_handle(), src.data_handle(), src.size(), raft::resource::get_cuda_stream(res)); + auto stream = raft::resource::get_cuda_stream(res); + RAFT_LOG_INFO("[STREAM DEBUG] copy_with_padding using stream: %p", + static_cast(stream)); + raft::copy(dst.data_handle(), src.data_handle(), src.size(), stream); } else { // copy with padding RAFT_CUDA_TRY(cudaMemsetAsync( diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in deleted file mode 100644 index ccef2fd68d..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter.cu.in +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include <@header_file@> - -namespace cuvs::neighbors::ivf_flat::detail { - -// Instantiate the device function template -template __device__ bool sample_filter(int64_t* const* const, uint32_t, uint32_t, uint32_t, uint32_t*, int64_t, int64_t); - -} // namespace cuvs::neighbors::ivf_flat::detail - -#else - -#include -#include "@filter_name@.h" - -__attribute__((__constructor__)) static void register_@filter_name@() -{ - registerAlgorithm( - "@filter_name@", - embedded_@filter_name@, - sizeof(embedded_@filter_name@)); -} - -#endif From bbbfb25d241297c2477080b4d52013e9c977f37d Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 25 Feb 2026 02:55:00 +0000 Subject: [PATCH 126/158] launch correctly --- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 478a4c87c1..ef72a36107 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -41,6 +41,7 @@ void AlgorithmLauncher::call( config.blockDim = block; config.stream = stream; config.dynamicSmemBytes = shared_mem; + config.numAttrs = 0; RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); } @@ -48,11 +49,17 @@ void AlgorithmLauncher::call( void AlgorithmLauncher::call_cooperative( cudaStream_t stream, dim3 grid, dim3 block, std::size_t shared_mem, void** kernel_args) { + cudaLaunchAttribute attribute[1]; + attribute[0].id = cudaLaunchAttributeCooperative; + attribute[0].val.cooperative = 1; + cudaLaunchConfig_t config; config.gridDim = grid; config.blockDim = block; config.stream = stream; config.dynamicSmemBytes = shared_mem; + config.numAttrs = 1; + config.attrs = attribute; RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); } From 22c40fd11aca8265d8ed3f0b4ca683fd57b6e7c7 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 25 Feb 2026 19:00:21 +0000 Subject: [PATCH 127/158] Use new kernel matrix system --- cpp/CMakeLists.txt | 223 ++++++++++++++++-- .../modules/generate_jit_lto_kernels.cmake | 5 + .../apply_filter_embedded.cpp.in | 26 ++ .../jit_lto_kernels/apply_filter_kernel.cu.in | 24 +- ...l_matrix.json => apply_filter_matrix.json} | 0 .../apply_normalization_standard_cosine.cu.in | 40 ---- ...malization_standard_cosine_embedded.cpp.in | 28 +++ ...normalization_standard_cosine_kernel.cu.in | 16 ++ .../apply_normalization_standard_noop.cu.in | 40 ---- ...ormalization_standard_noop_embedded.cpp.in | 28 +++ ...y_normalization_standard_noop_kernel.cu.in | 16 ++ .../compute_distance_standard.cu.in | 40 ---- .../compute_distance_standard_embedded.cpp.in | 28 +++ .../compute_distance_standard_kernel.cu.in | 16 ++ ...ance_to_child_nodes_kernel_embedded.cpp.in | 27 +++ ...stance_to_child_nodes_kernel_kernel.cu.in} | 25 +- .../compute_distance_vpq.cu.in | 41 ---- .../compute_distance_vpq_embedded.cpp.in | 29 +++ .../compute_distance_vpq_kernel.cu.in | 16 ++ .../cagra/jit_lto_kernels/dist_op.cu.in | 35 --- .../jit_lto_kernels/dist_op_embedded.cpp.in | 25 ++ .../jit_lto_kernels/dist_op_kernel.cu.in | 14 ++ .../random_pickup_kernel_embedded.cpp.in | 27 +++ ...u.in => random_pickup_kernel_kernel.cu.in} | 25 +- .../search_multi_cta_kernel_embedded.cpp.in | 27 +++ ...n => search_multi_cta_kernel_kernel.cu.in} | 25 +- .../search_single_cta_kernel_embedded.cpp.in | 27 +++ ... => search_single_cta_kernel_kernel.cu.in} | 25 +- .../search_single_cta_kernel_matrix.json | 16 +- ...search_single_cta_kernel_p_embedded.cpp.in | 27 +++ ...> search_single_cta_kernel_p_kernel.cu.in} | 25 +- .../search_single_cta_kernel_p_matrix.json | 16 +- .../setup_workspace_standard.cu.in | 39 --- .../setup_workspace_standard_embedded.cpp.in | 28 +++ .../setup_workspace_standard_kernel.cu.in | 15 ++ .../jit_lto_kernels/setup_workspace_vpq.cu.in | 40 ---- .../setup_workspace_vpq_embedded.cpp.in | 29 +++ .../setup_workspace_vpq_kernel.cu.in | 15 ++ .../detail/jit_lto_kernels/filter.cu.in | 33 --- .../jit_lto_kernels/filter_embedded.cpp.in | 4 +- .../jit_lto_kernels/filter_kernel.cu.in | 16 ++ .../jit_lto_kernels/filter_kernel.cu.in | 15 -- .../jit_lto_kernels/filter_matrix.json | 13 +- 43 files changed, 724 insertions(+), 505 deletions(-) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_embedded.cpp.in rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{apply_filter_kernel_matrix.json => apply_filter_matrix.json} (100%) delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_embedded.cpp.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_kernel.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_embedded.cpp.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_kernel.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_embedded.cpp.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_kernel.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_embedded.cpp.in rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{compute_distance_to_child_nodes_kernel.cu.in => compute_distance_to_child_nodes_kernel_kernel.cu.in} (54%) delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_embedded.cpp.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_kernel.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_embedded.cpp.in rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{random_pickup_kernel.cu.in => random_pickup_kernel_kernel.cu.in} (53%) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_embedded.cpp.in rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_multi_cta_kernel.cu.in => search_multi_cta_kernel_kernel.cu.in} (59%) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_embedded.cpp.in rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_single_cta_kernel.cu.in => search_single_cta_kernel_kernel.cu.in} (53%) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_embedded.cpp.in rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_single_cta_kernel_p.cu.in => search_single_cta_kernel_p_kernel.cu.in} (53%) delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_embedded.cpp.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_kernel.cu.in delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_embedded.cpp.in create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_kernel.cu.in delete mode 100644 cpp/src/neighbors/detail/jit_lto_kernels/filter.cu.in rename cpp/src/neighbors/{ivf_flat => detail}/jit_lto_kernels/filter_embedded.cpp.in (66%) create mode 100644 cpp/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in delete mode 100644 cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_kernel.cu.in diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cd56e9c4d2..00f0df3f59 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -371,69 +371,254 @@ if(NOT BUILD_CPU_ONLY) "$<$:${CUVS_CUDA_FLAGS}>" ) target_compile_features(jit_lto_kernel_usage_requirements INTERFACE cuda_std_20) + target_compile_definitions(jit_lto_kernel_usage_requirements INTERFACE BUILD_KERNEL) target_link_libraries( jit_lto_kernel_usage_requirements INTERFACE rmm::rmm raft::raft CCCL::CCCL ) - block(PROPAGATE interleaved_scan_files metric_files filter_files post_lambda_files) + block(PROPAGATE jit_lto_kernel_files) set(CMAKE_CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE}) generate_jit_lto_kernels( - interleaved_scan_files + jit_lto_kernel_files NAME_FORMAT - "interleaved_scan_capacity_@capacity@_veclen_@veclen@_@ascending_descending@_@compute_norm_name@_data_@type_abbrev@_acc_@acc_abbrev@_idx_@idx_abbrev@" + "ivf_flat_interleaved_scan_capacity_@capacity@_veclen_@veclen@_@ascending_descending@_@compute_norm_name@_data_@type_abbrev@_acc_@acc_abbrev@_idx_@idx_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_matrix.json" KERNEL_INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in" EMBEDDED_INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_embedded.cpp.in" - OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/interleaved_scan" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_flat/interleaved_scan" KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements ) generate_jit_lto_kernels( - metric_files - NAME_FORMAT "metric_@metric_name@_veclen_@veclen@_data_@type_abbrev@_acc_@acc_abbrev@" + jit_lto_kernel_files + NAME_FORMAT + "ivf_flat_metric_@metric_name@_veclen_@veclen@_data_@type_abbrev@_acc_@acc_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/metric_matrix.json" KERNEL_INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/metric_kernel.cu.in" EMBEDDED_INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/metric_embedded.cpp.in" - OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/metric" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_flat/metric" KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements ) generate_jit_lto_kernels( - filter_files - NAME_FORMAT "@filter_name@" + jit_lto_kernel_files + NAME_FORMAT "ivf_flat_@filter_name@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json" KERNEL_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/filter_kernel.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in" EMBEDDED_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/filter_embedded.cpp.in" - OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/filter" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/jit_lto_kernels/filter_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_flat/filter" KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements ) generate_jit_lto_kernels( - post_lambda_files - NAME_FORMAT "@post_lambda_name@" + jit_lto_kernel_files + NAME_FORMAT "ivf_flat_@post_lambda_name@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_matrix.json" KERNEL_INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_kernel.cu.in" EMBEDDED_INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_embedded.cpp.in" - OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/post_lambda" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_flat/post_lambda" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_setup_workspace_standard_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_data_@data_abbrev@_query_@query_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_embedded.cpp.in" + OUTPUT_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/setup_workspace_standard" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_compute_distance_standard_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_data_@data_abbrev@_query_@query_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_embedded.cpp.in" + OUTPUT_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/compute_distance_standard" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT "cagra_dist_op_@metric_tag@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/dist_op" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_apply_normalization_standard_noop_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_data_@data_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_embedded.cpp.in" + OUTPUT_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/apply_normalization_standard_noop" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_apply_normalization_standard_cosine_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_data_@data_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_embedded.cpp.in" + OUTPUT_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/apply_normalization_standard_cosine" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_setup_workspace_vpq_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_data_@data_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/setup_workspace_vpq" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_compute_distance_vpq_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_data_@data_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/compute_distance_vpq" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_embedded.cpp.in" + OUTPUT_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/search_single_cta_kernel" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_embedded.cpp.in" + OUTPUT_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/search_single_cta_kernel_p" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_search_multi_cta_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/search_multi_cta_kernel" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_random_pickup_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/random_pickup_kernel" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT + "cagra_compute_distance_to_child_nodes_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_embedded.cpp.in" + OUTPUT_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/compute_distance_to_child_nodes_kernel" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT "cagra_appy_filter" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/apply_filter" + KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements + ) + generate_jit_lto_kernels( + jit_lto_kernel_files + NAME_FORMAT "cagra_@filter_name@" + MATRIX_JSON_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/filter_matrix.json" + KERNEL_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in" + EMBEDDED_INPUT_FILE + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/jit_lto_kernels/filter_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/filter" KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements ) endblock() add_library( cuvs_jit_lto_kernels STATIC - ${interleaved_scan_files} - ${metric_files} - ${filter_files} - ${post_lambda_files} + ${jit_lto_kernel_files} src/detail/jit_lto/AlgorithmLauncher.cu src/detail/jit_lto/AlgorithmPlanner.cu src/detail/jit_lto/FragmentDatabase.cu diff --git a/cpp/cmake/modules/generate_jit_lto_kernels.cmake b/cpp/cmake/modules/generate_jit_lto_kernels.cmake index 1454bac97e..e27f432b76 100644 --- a/cpp/cmake/modules/generate_jit_lto_kernels.cmake +++ b/cpp/cmake/modules/generate_jit_lto_kernels.cmake @@ -129,6 +129,11 @@ function(generate_jit_lto_kernels source_list_var) find_package(Python3 REQUIRED COMPONENTS Interpreter) if(_JIT_LTO_MATRIX_JSON_FILE) + set_property( + DIRECTORY + PROPERTY CMAKE_CONFIGURE_DEPENDS "${_JIT_LTO_MATRIX_JSON_FILE}" + APPEND + ) compute_matrix_product(matrix_product MATRIX_JSON_FILE "${_JIT_LTO_MATRIX_JSON_FILE}") else() compute_matrix_product(matrix_product MATRIX_JSON_STRING "${_JIT_LTO_MATRIX_JSON_STRING}") diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_embedded.cpp.in new file mode 100644 index 0000000000..92151fad70 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_embedded.cpp.in @@ -0,0 +1,26 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + registerAlgorithm( + "apply_filter_kernel", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in index 57f5c3938b..a5f74938db 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in @@ -1,12 +1,10 @@ /* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. -#ifdef BUILD_KERNEL - #include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { @@ -16,23 +14,3 @@ template __global__ void apply_filter_kernel_jit<@index_type@, @distance_type@, const @source_index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, const std::uint32_t, const @index_type@, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search - -#else - -#include -#include -#include "apply_filter_kernel_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_apply_filter_kernel_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() -{ - registerAlgorithm( - "apply_filter_kernel", - embedded_apply_filter_kernel_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@, - sizeof(embedded_apply_filter_kernel_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_matrix.json similarity index 100% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel_matrix.json rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_matrix.json diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in deleted file mode 100644 index 60ecb02203..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine.cu.in +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( - @distance_type@, const args_t, @index_type@); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; - registerAlgorithm( - "apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@", - embedded_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_apply_normalization_standard_cosine_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_embedded.cpp.in new file mode 100644 index 0000000000..06506a5bf9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_embedded.cpp.in @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; + registerAlgorithm( + "apply_normalization_standard_cosine_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_kernel.cu.in new file mode 100644 index 0000000000..6535f578cc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_cosine_kernel.cu.in @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include + +namespace cuvs::neighbors::cagra::detail { + +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( + @distance_type@, const args_t, @index_type@); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in deleted file mode 100644 index ae1fa2fbf0..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop.cu.in +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( - @distance_type@, const args_t, @index_type@); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; - registerAlgorithm( - "apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@", - embedded_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_apply_normalization_standard_noop_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_embedded.cpp.in new file mode 100644 index 0000000000..62abe7fb9c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_embedded.cpp.in @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + using QueryTag = cuvs::neighbors::cagra::detail::tag_f; + registerAlgorithm( + "apply_normalization_standard_noop_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_kernel.cu.in new file mode 100644 index 0000000000..3e31c28e69 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_noop_kernel.cu.in @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include + +namespace cuvs::neighbors::cagra::detail { + +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( + @distance_type@, const args_t, @index_type@); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in deleted file mode 100644 index 46756fe67d..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard.cu.in +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( - const args_t, @index_type@); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_type_abbrev@; - registerAlgorithm( - "compute_distance_standard_t@team_size@_dim@dataset_block_dim@", - embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, - sizeof(embedded_compute_distance_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_embedded.cpp.in new file mode 100644 index 0000000000..56a21c9c3e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_embedded.cpp.in @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_abbrev@; + registerAlgorithm( + "compute_distance_standard_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_kernel.cu.in new file mode 100644 index 0000000000..f1d6d92ad1 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_standard_kernel.cu.in @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include + +namespace cuvs::neighbors::cagra::detail { + +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( + const args_t, @index_type@); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_embedded.cpp.in new file mode 100644 index 0000000000..cc68659c84 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_embedded.cpp.in @@ -0,0 +1,27 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + registerAlgorithm( + "compute_distance_to_child_nodes_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_kernel.cu.in similarity index 54% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_kernel.cu.in index 0f36470ad2..bdd318944d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_kernel.cu.in @@ -1,12 +1,10 @@ /* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. -#ifdef BUILD_KERNEL - #include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { @@ -20,24 +18,3 @@ template __global__ void compute_distance_to_child_nodes_kernel_jit<@team_size@, const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search - -#else - -#include -#include -#include "compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_compute_distance_to_child_nodes_kernel@pq_prefix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() -{ - registerAlgorithm( - "compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, - sizeof(embedded_compute_distance_to_child_nodes_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in deleted file mode 100644 index e04e11369b..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq.cu.in +++ /dev/null @@ -1,41 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( - const args_t, @index_type@); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - registerAlgorithm( - "compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_compute_distance_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_embedded.cpp.in new file mode 100644 index 0000000000..7e91d9ca29 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_embedded.cpp.in @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; + registerAlgorithm( + "compute_distance_vpq_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_kernel.cu.in new file mode 100644 index 0000000000..16913953f7 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_kernel.cu.in @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include + +namespace cuvs::neighbors::cagra::detail { + +using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( + const args_t, @index_type@); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in deleted file mode 100644 index 406a05df2c..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op.cu.in +++ /dev/null @@ -1,35 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -template __device__ @distance_type@ dist_op<@query_type@, @distance_type@>(@query_type@, @query_type@); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "dist_op_@metric_tag@", - embedded_dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@, - sizeof(embedded_dist_op_@metric_tag@_@query_type_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in new file mode 100644 index 0000000000..4f905e1c3a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in @@ -0,0 +1,25 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) static void register_kernel() +{ + registerAlgorithm( + "dist_op_@metric_tag@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in new file mode 100644 index 0000000000..6b0158d79c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include + +namespace cuvs::neighbors::cagra::detail { + +template __device__ @distance_type@ dist_op<@query_type@, @distance_type@>(@query_type@, @query_type@); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_embedded.cpp.in new file mode 100644 index 0000000000..7f1c17c3bb --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_embedded.cpp.in @@ -0,0 +1,27 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + registerAlgorithm( + "random_pickup_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_kernel.cu.in similarity index 53% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_kernel.cu.in index eb1c8e1d29..9007fe2cf6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_kernel.cu.in @@ -1,12 +1,10 @@ /* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. -#ifdef BUILD_KERNEL - #include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { @@ -20,24 +18,3 @@ template __global__ void random_pickup_kernel_jit<@team_size@, @dataset_block_di cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search - -#else - -#include -#include -#include "random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_random_pickup_kernel@pq_prefix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - registerAlgorithm( - "random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, - sizeof(embedded_random_pickup_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_embedded.cpp.in new file mode 100644 index 0000000000..20c1fccd83 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_embedded.cpp.in @@ -0,0 +1,27 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + registerAlgorithm( + "search_multi_cta_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_kernel.cu.in similarity index 59% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_kernel.cu.in index 648d10e977..678010642b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_kernel.cu.in @@ -1,12 +1,10 @@ /* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. -#ifdef BUILD_KERNEL - // Include helpers first so they're available when the JIT kernel is instantiated #include // For pickup_next_parent and topk_by_bitonic_sort_wrapper_* @@ -23,24 +21,3 @@ template __global__ void search_kernel_jit<@team_size@, @dataset_block_dim@, @pq @index_type@* const, @distance_type@* const, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::multi_cta_search - -#else - -#include -#include -#include "search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_search_multi_cta_kernel@pq_prefix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() -{ - registerAlgorithm( - "search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_multi_cta_kernel@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_embedded.cpp.in new file mode 100644 index 0000000000..597c12f68f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_embedded.cpp.in @@ -0,0 +1,27 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + registerAlgorithm( + "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_kernel.cu.in similarity index 53% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_kernel.cu.in index e7d314e505..d61b010bd2 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_kernel.cu.in @@ -1,12 +1,10 @@ /* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. -#ifdef BUILD_KERNEL - #include namespace cuvs::neighbors::cagra::detail::single_cta_search { @@ -20,24 +18,3 @@ template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_b uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search - -#else - -#include -#include -#include "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() -{ - registerAlgorithm( - "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json index 889fae2242..3ea5b9c72a 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json @@ -67,21 +67,21 @@ "_topk_by_bitonic": [ { "topk_by_bitonic_sort": "true", - "topk_by_bitonic_sort_str": "true" + "topk_by_bitonic_sort_str": "topk_by_bitonic_sort" }, { "topk_by_bitonic_sort": "false", - "topk_by_bitonic_sort_str": "false" + "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort" } ], "_bitonic_sort_and_merge_multi_warps": [ { "bitonic_sort_and_merge_multi_warps": "true", - "bitonic_sort_and_merge_multi_warps_str": "true" + "bitonic_sort_and_merge_multi_warps_str": "topk_by_bitonic_sort" }, { "bitonic_sort_and_merge_multi_warps": "false", - "bitonic_sort_and_merge_multi_warps_str": "false" + "bitonic_sort_and_merge_multi_warps_str": "no_topk_by_bitonic_sort" } ], "team_size": [ @@ -155,21 +155,21 @@ "_topk_by_bitonic": [ { "topk_by_bitonic_sort": "true", - "topk_by_bitonic_sort_str": "true" + "topk_by_bitonic_sort_str": "topk_by_bitonic_sort" }, { "topk_by_bitonic_sort": "false", - "topk_by_bitonic_sort_str": "false" + "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort" } ], "_bitonic_sort_and_merge_multi_warps": [ { "bitonic_sort_and_merge_multi_warps": "true", - "bitonic_sort_and_merge_multi_warps_str": "true" + "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps" }, { "bitonic_sort_and_merge_multi_warps": "false", - "bitonic_sort_and_merge_multi_warps_str": "false" + "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" } ], "team_size": [ diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_embedded.cpp.in new file mode 100644 index 0000000000..d0041208c1 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_embedded.cpp.in @@ -0,0 +1,27 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + registerAlgorithm( + "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_kernel.cu.in similarity index 53% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_kernel.cu.in index 1cbaddd41d..6178bb23a1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_kernel.cu.in @@ -1,12 +1,10 @@ /* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ // This file is auto-generated. Do not edit manually. -#ifdef BUILD_KERNEL - #include namespace cuvs::neighbors::cagra::detail::single_cta_search { @@ -20,24 +18,3 @@ template __global__ void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_so worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search - -#else - -#include -#include -#include "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@src_idx_abbrev@() -{ - registerAlgorithm( - "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@, - sizeof(embedded_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@_@src_idx_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json index 889fae2242..284e7ae497 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json @@ -67,21 +67,21 @@ "_topk_by_bitonic": [ { "topk_by_bitonic_sort": "true", - "topk_by_bitonic_sort_str": "true" + "topk_by_bitonic_sort_str": "topk_by_bitonic_sort" }, { "topk_by_bitonic_sort": "false", - "topk_by_bitonic_sort_str": "false" + "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort" } ], "_bitonic_sort_and_merge_multi_warps": [ { "bitonic_sort_and_merge_multi_warps": "true", - "bitonic_sort_and_merge_multi_warps_str": "true" + "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps" }, { "bitonic_sort_and_merge_multi_warps": "false", - "bitonic_sort_and_merge_multi_warps_str": "false" + "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" } ], "team_size": [ @@ -155,21 +155,21 @@ "_topk_by_bitonic": [ { "topk_by_bitonic_sort": "true", - "topk_by_bitonic_sort_str": "true" + "topk_by_bitonic_sort_str": "topk_by_bitonic_sort" }, { "topk_by_bitonic_sort": "false", - "topk_by_bitonic_sort_str": "false" + "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort" } ], "_bitonic_sort_and_merge_multi_warps": [ { "bitonic_sort_and_merge_multi_warps": "true", - "bitonic_sort_and_merge_multi_warps_str": "true" + "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps" }, { "bitonic_sort_and_merge_multi_warps": "false", - "bitonic_sort_and_merge_multi_warps_str": "false" + "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" } ], "team_size": [ diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in deleted file mode 100644 index a7b75ec067..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard.cu.in +++ /dev/null @@ -1,39 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( - cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_type_abbrev@; - registerAlgorithm( - "setup_workspace_standard_t@team_size@_dim@dataset_block_dim@", - embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@, - sizeof(embedded_setup_workspace_standard_t@team_size@_dim@dataset_block_dim@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@_@query_type_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_embedded.cpp.in new file mode 100644 index 0000000000..6931d7c893 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_embedded.cpp.in @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_abbrev@; + registerAlgorithm( + "setup_workspace_standard_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_kernel.cu.in new file mode 100644 index 0000000000..f8d0585099 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_standard_kernel.cu.in @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include + +namespace cuvs::neighbors::cagra::detail { + +template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, 0, 0, void, @data_type@, @index_type@, @distance_type@, @query_type@>( + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in deleted file mode 100644 index ac2b791f4a..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq.cu.in +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace cuvs::neighbors::cagra::detail { - -template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( - cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); - -} // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_h; - registerAlgorithm( - "setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", - embedded_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@, - sizeof(embedded_setup_workspace_vpq_t@team_size@_dim@dataset_block_dim@_@pq_bits@pq_@pq_len@subd_@type_abbrev@_@idx_abbrev@_@dist_abbrev@)); -} - -#endif diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_embedded.cpp.in new file mode 100644 index 0000000000..2ce4e26b80 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_embedded.cpp.in @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include +#include +#include "@embedded_header_file@" + +using namespace cuvs::neighbors::cagra::detail; + +namespace { + +__attribute__((__constructor__)) void register_kernel() +{ + using QueryTag = cuvs::neighbors::cagra::detail::tag_h; + registerAlgorithm( + "setup_workspace_vpq_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_@pq_bits@pq_@pq_len@subd", + embedded_fatbin, + sizeof(embedded_fatbin)); +} + +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_kernel.cu.in new file mode 100644 index 0000000000..dd252501c5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_vpq_kernel.cu.in @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include + +namespace cuvs::neighbors::cagra::detail { + +template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, half>( + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/jit_lto_kernels/filter.cu.in b/cpp/src/neighbors/detail/jit_lto_kernels/filter.cu.in deleted file mode 100644 index b2b5a221b6..0000000000 --- a/cpp/src/neighbors/detail/jit_lto_kernels/filter.cu.in +++ /dev/null @@ -1,33 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) @year@, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#ifdef BUILD_KERNEL - -#include - -namespace @namespace@ { - -// Instantiate the sample_filter device function template -// CAGRA style: sample_filter(query_id, node_id, filter_data) -template __device__ bool sample_filter<@source_index_type@>(uint32_t, @source_index_type@, void*); - -} // namespace @namespace@ - -#else - -#include -#include "@kernel_name_var@.h" - -__attribute__((__constructor__)) static void register_@kernel_name_var@() -{ - registerAlgorithm( - "sample_filter_@kernel_name_var@", - embedded_@kernel_name_var@, - sizeof(embedded_@kernel_name_var@)); -} - -#endif diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_embedded.cpp.in b/cpp/src/neighbors/detail/jit_lto_kernels/filter_embedded.cpp.in similarity index 66% rename from cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_embedded.cpp.in rename to cpp/src/neighbors/detail/jit_lto_kernels/filter_embedded.cpp.in index a5a7299b73..4cd80fdaf0 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_embedded.cpp.in +++ b/cpp/src/neighbors/detail/jit_lto_kernels/filter_embedded.cpp.in @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -13,7 +13,7 @@ namespace { __attribute__((__constructor__)) void register_kernel() { registerAlgorithm( - "@filter_name@", + "sample_filter_@filter_name@_source_index_@source_index_abbrev@", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in b/cpp/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in new file mode 100644 index 0000000000..36588babdd --- /dev/null +++ b/cpp/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// This file is auto-generated. Do not edit manually. + +#include + +namespace @namespace@ { + +// Instantiate the sample_filter device function template +// CAGRA style: sample_filter(query_id, node_id, filter_data) +template __device__ bool sample_filter<@source_index_type@>(uint32_t, @source_index_type@, void*); + +} // namespace @namespace@ diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_kernel.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_kernel.cu.in deleted file mode 100644 index 9fb6277528..0000000000 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_kernel.cu.in +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ - -// This file is auto-generated. Do not edit manually. - -#include <@header_file@> - -namespace cuvs::neighbors::ivf_flat::detail { - -// Instantiate the device function template -template __device__ bool sample_filter(int64_t* const* const, uint32_t, uint32_t, uint32_t, uint32_t*, int64_t, int64_t); - -} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json index 6ceebe78c3..9b2019d9de 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json @@ -2,11 +2,20 @@ "_filter": [ { "filter_name": "filter_none", - "header_file": "neighbors/ivf_flat/jit_lto_kernels/filter_none.cuh" + "header_file": "neighbors/detail/jit_lto_kernels/filter_none.cuh" }, { "filter_name": "filter_bitset", - "header_file": "neighbors/ivf_flat/jit_lto_kernels/filter_bitset.cuh" + "header_file": "neighbors/detail/jit_lto_kernels/filter_bitset.cuh" } + ], + "_source_index": [ + { + "source_index_type": "int64_t", + "source_index_abbrev": "l" + } + ], + "namespace": [ + "cuvs::neighbors::detail" ] } From d404869ecee5fa29adb8ffc99980d12e9efc64b6 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 25 Feb 2026 19:13:22 +0000 Subject: [PATCH 128/158] remove debug prints --- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 2 +- .../detail/cagra/compute_distance.hpp | 55 ++--------------- .../jit_lto_kernels/cagra_planner_base.hpp | 15 ----- .../search_multi_cta_kernel_launcher_jit.cuh | 37 ----------- .../search_multi_kernel_launcher_jit.cuh | 61 ------------------- .../search_single_cta_kernel_launcher_jit.cuh | 54 ---------------- cpp/src/neighbors/detail/cagra/utils.hpp | 2 - 7 files changed, 6 insertions(+), 220 deletions(-) diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index 4e1e24d6dd..0ae28d3ced 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -63,7 +63,7 @@ std::shared_ptr AlgorithmPlanner::get_launcher() log_message += device_function + ","; } log_message.pop_back(); - RAFT_LOG_INFO("%s", log_message.c_str()); + RAFT_LOG_DEBUG("%s", log_message.c_str()); launchers[launch_key] = this->build(); } return launchers[launch_key]; diff --git a/cpp/src/neighbors/detail/cagra/compute_distance.hpp b/cpp/src/neighbors/detail/cagra/compute_distance.hpp index 5309831b2a..2466795514 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/src/neighbors/detail/cagra/compute_distance.hpp @@ -246,32 +246,15 @@ struct dataset_descriptor_host { std::atomic ready; // Not sure if std::holds_alternative is thread-safe std::variant value; - state() : ready{false} - { - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::state() constructor - this=%p", - static_cast(this)); - } - template state(InitF init, size_t size) : ready{false}, value{std::make_tuple(init, size)} { - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] state::state(InitF, size_t) constructor - this=%p, size=%zu", - static_cast(this), - size); } ~state() noexcept { if (std::holds_alternative(value)) { auto& [ptr, stream] = std::get(value); - RAFT_LOG_INFO("[STREAM DEBUG] state::~state() - freeing ptr=%p on stream=%p", - static_cast(ptr), - static_cast(stream)); - // Synchronize the stream before freeing to ensure all kernels using this descriptor have - // completed This prevents use-after-free if kernels are still running when the destructor - // is called - RAFT_CUDA_TRY_NO_THROW(cudaStreamSynchronize(stream)); RAFT_CUDA_TRY_NO_THROW(cudaFreeAsync(ptr, stream)); } } @@ -282,45 +265,17 @@ struct dataset_descriptor_host { if (std::holds_alternative(value)) { auto& [fun, size] = std::get(value); dev_descriptor_t* ptr = nullptr; - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - allocating %zu bytes", size); RAFT_CUDA_TRY(cudaMallocAsync(&ptr, size, stream)); - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - allocated ptr=%p", - static_cast(ptr)); - try { - fun(ptr, stream); - value = std::make_tuple(ptr, stream); - ready.store(true, std::memory_order_release); - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - initialization complete, ready=true"); - } catch (...) { - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - exception caught, freeing ptr=%p", - static_cast(ptr)); - // If fun() throws, free the allocated memory before rethrowing - RAFT_CUDA_TRY_NO_THROW(cudaFreeAsync(ptr, stream)); - throw; - } - } else { - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::eval() - already initialized, skipping"); + fun(ptr, stream); + value = std::make_tuple(ptr, stream); + ready.store(true, std::memory_order_release); } } auto get(rmm::cuda_stream_view stream) -> dev_descriptor_t* { - bool was_ready = ready.load(std::memory_order_acquire); - if (!was_ready) { - eval(stream); - // After eval(), value must be in ready_t state (either from this call or a concurrent one) - // If eval() threw, we won't reach here - was_ready = ready.load(std::memory_order_acquire); - } - // Only access value if we're sure it's in ready_t state - if (!was_ready || !std::holds_alternative(value)) { - RAFT_FAIL("Descriptor state is invalid - eval() must have failed"); - } - auto* ptr = std::get<0>(std::get(value)); - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] state::get() - was_ready=%d, ptr=%p", - was_ready, - static_cast(ptr)); - return ptr; + if (!ready.load(std::memory_order_acquire)) { eval(stream); } + return std::get<0>(std::get(value)); } }; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp index 5f6242287c..b255c17188 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp @@ -41,17 +41,11 @@ struct CagraPlannerBase : AlgorithmPlanner { key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; if (!params.empty()) { key += "_" + params; } - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] setup_workspace VPQ key: %s (params: %s)", - key.c_str(), - params.c_str()); } else { key += "standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); auto params = make_fragment_key(); if (!params.empty()) { key += "_" + params; } - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] setup_workspace standard key: %s (params: %s)", - key.c_str(), - params.c_str()); } this->device_functions.push_back(key); } @@ -70,18 +64,12 @@ struct CagraPlannerBase : AlgorithmPlanner { key += "_dim" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; if (!params.empty()) { key += "_" + params; } - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] compute_distance VPQ key: %s (params: %s)", - key.c_str(), - params.c_str()); this->device_functions.push_back(key); } else { std::string key = "compute_distance_standard_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); auto params = make_fragment_key(); if (!params.empty()) { key += "_" + params; } - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] compute_distance standard key: %s (params: %s)", - key.c_str(), - params.c_str()); this->device_functions.push_back(key); add_dist_op_device_function(metric); add_normalization_device_function(metric, team_size, dataset_block_dim); @@ -102,7 +90,6 @@ struct CagraPlannerBase : AlgorithmPlanner { auto params = make_fragment_key(); std::string key = "dist_op_" + metric_tag; if (!params.empty()) { key += "_" + params; } - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] dist_op key: %s (params: %s)", key.c_str(), params.c_str()); this->device_functions.push_back(key); } @@ -121,8 +108,6 @@ struct CagraPlannerBase : AlgorithmPlanner { key += "_t" + std::to_string(team_size); key += "_dim" + std::to_string(dataset_block_dim); if (!params.empty()) { key += "_" + params; } - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] apply_normalization key: %s (params: %s)", key.c_str(), params.c_str()); this->device_functions.push_back(key); } diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index e2d54f234b..1df791e99e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -60,20 +60,6 @@ void select_and_run_jit( SampleFilterT sample_filter, cudaStream_t stream) { - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] select_and_run_jit (multi_cta) - is_vpq=%d, metric=%d, team_size=%u, " - "dataset_block_dim=%u, pq_bits=%u, pq_len=%u, queries_ptr=%p, topk_indices_ptr=%p, " - "topk_distances_ptr=%p", - dataset_desc.is_vpq, - static_cast(dataset_desc.metric), - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.pq_bits, - dataset_desc.pq_len, - static_cast(queries_ptr), - static_cast(topk_indices_ptr), - static_cast(topk_distances_ptr)); - // Extract bitset data from filter object (if it's a bitset_filter) uint32_t* bitset_ptr = nullptr; SourceIndexT bitset_len = 0; @@ -103,22 +89,12 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] multi_cta - DataTag=%s, IndexTag=%s, DistTag=%s, SourceTag=%s", - typeid(DataTag).name(), - typeid(IndexTag).name(), - typeid(DistTag).name(), - typeid(SourceTag).name()); - // Create planner and register device functions // Pass team_size, dataset_block_dim, and VPQ parameters to match the kernel entrypoint name std::shared_ptr launcher; if (dataset_desc.is_vpq) { using QueryTag = query_type_tag_vpq_t; using CodebookTag = codebook_tag_vpq_t; - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] multi_cta VPQ path - QueryTag=%s, CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraMultiCtaSearchPlanner planner(dataset_desc.metric, dataset_desc.team_size, @@ -147,11 +123,6 @@ void select_and_run_jit( if (dataset_desc.metric == cuvs::distance::DistanceType::BitwiseHamming) { using QueryTag = query_type_tag_standard_t; - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] multi_cta Standard path (BitwiseHamming) - QueryTag=%s, " - "CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraMultiCtaSearchPlanner planner(dataset_desc.metric, dataset_desc.team_size, @@ -177,11 +148,6 @@ void select_and_run_jit( launcher = planner.get_launcher(); } else { using QueryTag = query_type_tag_standard_t; - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] multi_cta Standard path (non-BitwiseHamming) - QueryTag=%s, " - "CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraMultiCtaSearchPlanner planner(dataset_desc.metric, dataset_desc.team_size, @@ -237,12 +203,9 @@ void select_and_run_jit( dim3 grid_dims(num_cta_per_query, num_queries, 1); // Get the device descriptor pointer - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] multi_cta About to call dev_ptr()"); const dataset_descriptor_base_t* dev_desc_base = dataset_desc.dev_ptr(stream); const auto* dev_desc = dev_desc_base; - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] multi_cta dev_ptr() returned: %p", - static_cast(dev_desc)); // Note: dataset_desc is passed by const reference, so it stays alive for the duration of this // function The descriptor's state is managed by a shared_ptr internally, so no need to explicitly diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh index 6a913bd7e8..5279165a43 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -49,38 +49,17 @@ void random_pickup_jit(const dataset_descriptor_host& std::uint32_t hash_bitlen, cudaStream_t cuda_stream) { - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] random_pickup_jit - is_vpq=%d, metric=%d, team_size=%u, " - "dataset_block_dim=%u, pq_bits=%u, pq_len=%u, queries_ptr=%p", - dataset_desc.is_vpq, - static_cast(dataset_desc.metric), - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.pq_bits, - dataset_desc.pq_len, - static_cast(queries_ptr)); - // Create planner with tags using DataTag = decltype(get_data_type_tag()); using IndexTag = decltype(get_index_type_tag()); using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); // Use IndexT for source - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] random_pickup - DataTag=%s, IndexTag=%s, DistTag=%s, SourceTag=%s", - typeid(DataTag).name(), - typeid(IndexTag).name(), - typeid(DistTag).name(), - typeid(SourceTag).name()); - // Create planner and register device functions std::shared_ptr launcher; if (dataset_desc.is_vpq) { using QueryTag = query_type_tag_vpq_t; using CodebookTag = codebook_tag_vpq_t; - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] random_pickup VPQ path - QueryTag=%s, CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraMultiKernelSearchPlanner planner(dataset_desc.metric, "random_pickup_kernel", @@ -130,11 +109,6 @@ void random_pickup_jit(const dataset_descriptor_host& launcher = planner.get_launcher(); } else { using QueryTag = query_type_tag_standard_t; - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] random_pickup Standard path (non-BitwiseHamming) - QueryTag=%s, " - "CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraMultiKernelSearchPlanner planner(dataset_desc.metric, "random_pickup_kernel", @@ -165,10 +139,7 @@ void random_pickup_jit(const dataset_descriptor_host& num_queries); // Get the device descriptor pointer - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] random_pickup About to call dev_ptr()"); const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] random_pickup dev_ptr() returned: %p", - static_cast(dev_desc)); // Cast size_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly @@ -221,41 +192,17 @@ void compute_distance_to_child_nodes_jit( SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream) { - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes_jit - is_vpq=%d, metric=%d, " - "team_size=%u, " - "dataset_block_dim=%u, pq_bits=%u, pq_len=%u, query_ptr=%p", - dataset_desc.is_vpq, - static_cast(dataset_desc.metric), - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.pq_bits, - dataset_desc.pq_len, - static_cast(query_ptr)); - // Create planner with tags using DataTag = decltype(get_data_type_tag()); using IndexTag = decltype(get_index_type_tag()); using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes - DataTag=%s, IndexTag=%s, DistTag=%s, " - "SourceTag=%s", - typeid(DataTag).name(), - typeid(IndexTag).name(), - typeid(DistTag).name(), - typeid(SourceTag).name()); - // Create planner and register device functions std::shared_ptr launcher; if (dataset_desc.is_vpq) { using QueryTag = query_type_tag_vpq_t; using CodebookTag = codebook_tag_vpq_t; - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes VPQ path - QueryTag=%s, CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraMultiKernelSearchPlanner planner(dataset_desc.metric, "compute_distance_to_child_nodes_kernel", @@ -282,11 +229,6 @@ void compute_distance_to_child_nodes_jit( if (dataset_desc.metric == cuvs::distance::DistanceType::BitwiseHamming) { using QueryTag = query_type_tag_standard_t; - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes Standard path (BitwiseHamming) - " - "QueryTag=%s, CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraMultiKernelSearchPlanner planner(dataset_desc.metric, "compute_distance_to_child_nodes_kernel", @@ -340,10 +282,7 @@ void compute_distance_to_child_nodes_jit( num_queries); // Get the device descriptor pointer - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes About to call dev_ptr()"); const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] compute_distance_to_child_nodes dev_ptr() returned: %p", - static_cast(dev_desc)); // Dispatch kernel via launcher launcher->dispatch(cuda_stream, diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 8469b5b430..05e4671ebe 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -574,20 +574,6 @@ void select_and_run_jit( SampleFilterT sample_filter, cudaStream_t stream) { - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] select_and_run_jit - is_vpq=%d, metric=%d, team_size=%u, " - "dataset_block_dim=%u, pq_bits=%u, pq_len=%u, queries_ptr=%p, topk_indices_ptr=%p, " - "topk_distances_ptr=%p", - dataset_desc.is_vpq, - static_cast(dataset_desc.metric), - dataset_desc.team_size, - dataset_desc.dataset_block_dim, - dataset_desc.pq_bits, - dataset_desc.pq_len, - static_cast(queries_ptr), - reinterpret_cast(topk_indices_ptr), - static_cast(topk_distances_ptr)); - const SourceIndexT* source_indices_ptr = source_indices.has_value() ? source_indices->data_handle() : nullptr; @@ -634,20 +620,10 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] Persistent kernel - DataTag=%s, IndexTag=%s, DistTag=%s, SourceTag=%s", - typeid(DataTag).name(), - typeid(IndexTag).name(), - typeid(DistTag).name(), - typeid(SourceTag).name()); - std::shared_ptr launcher; if (dataset_desc.is_vpq) { using QueryTag = query_type_tag_vpq_t; using CodebookTag = codebook_tag_vpq_t; - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] VPQ path - QueryTag=%s, CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraSingleCtaSearchPlanner planner(dataset_desc.metric, topk_by_bitonic_sort, @@ -678,10 +654,6 @@ void select_and_run_jit( if (dataset_desc.metric == cuvs::distance::DistanceType::BitwiseHamming) { using QueryTag = query_type_tag_standard_t; - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] Standard path (BitwiseHamming) - QueryTag=%s, CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraSingleCtaSearchPlanner planner(dataset_desc.metric, topk_by_bitonic_sort, @@ -740,11 +712,7 @@ void select_and_run_jit( if (!launcher) { RAFT_FAIL("Failed to get JIT launcher for CAGRA persistent search kernel"); } // Use get_runner pattern similar to non-JIT version - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] About to call get_runner_jit and dev_ptr() for persistent kernel"); const auto* dev_desc_persistent = dataset_desc.dev_ptr(stream); - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] dev_ptr() for persistent kernel returned: %p", - static_cast(dev_desc_persistent)); get_runner_jit(std::cref(dataset_desc), graph, source_indices_ptr, @@ -777,20 +745,10 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] Regular kernel - DataTag=%s, IndexTag=%s, DistTag=%s, SourceTag=%s", - typeid(DataTag).name(), - typeid(IndexTag).name(), - typeid(DistTag).name(), - typeid(SourceTag).name()); - std::shared_ptr launcher; if (dataset_desc.is_vpq) { using QueryTag = query_type_tag_vpq_t; using CodebookTag = codebook_tag_vpq_t; - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] VPQ path - QueryTag=%s, CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraSingleCtaSearchPlanner planner(dataset_desc.metric, topk_by_bitonic_sort, @@ -820,10 +778,6 @@ void select_and_run_jit( if (dataset_desc.metric == cuvs::distance::DistanceType::BitwiseHamming) { using QueryTag = query_type_tag_standard_t; - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] Standard path (BitwiseHamming) - QueryTag=%s, CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraSingleCtaSearchPlanner planner(dataset_desc.metric, topk_by_bitonic_sort, @@ -851,10 +805,6 @@ void select_and_run_jit( } else { using QueryTag = query_type_tag_standard_t; - RAFT_LOG_INFO( - "[JIT FRAGMENT DEBUG] Standard path (non-BitwiseHamming) - QueryTag=%s, CodebookTag=%s", - typeid(QueryTag).name(), - typeid(CodebookTag).name()); CagraSingleCtaSearchPlanner planner(dataset_desc.metric, topk_by_bitonic_sort, @@ -884,10 +834,7 @@ void select_and_run_jit( if (!launcher) { RAFT_FAIL("Failed to get JIT launcher for CAGRA search kernel"); } // Get the device descriptor pointer - dev_ptr() initializes it if needed - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] About to call dev_ptr()"); const auto* dev_desc = dataset_desc.dev_ptr(stream); - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] dev_ptr() returned: %p", - static_cast(dev_desc)); // Cast size_t/int64_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly @@ -910,7 +857,6 @@ void select_and_run_jit( smem_size); // Dispatch kernel via launcher - RAFT_LOG_INFO("[JIT FRAGMENT DEBUG] About to dispatch kernel via launcher"); launcher->dispatch( stream, grid, diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp index 678274ffd5..f78cdf7bf4 100644 --- a/cpp/src/neighbors/detail/cagra/utils.hpp +++ b/cpp/src/neighbors/detail/cagra/utils.hpp @@ -283,8 +283,6 @@ void copy_with_padding( } if (dst.extent(1) == src.extent(1)) { auto stream = raft::resource::get_cuda_stream(res); - RAFT_LOG_INFO("[STREAM DEBUG] copy_with_padding using stream: %p", - static_cast(stream)); raft::copy(dst.data_handle(), src.data_handle(), src.size(), stream); } else { // copy with padding From 1eef8c5eaa30bc1c12c400098c76be3751cd72f6 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 25 Feb 2026 22:17:09 +0000 Subject: [PATCH 129/158] Remove preprocessor branch --- .../setup_workspace_kernel.cu.in | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in index f39e3ce625..2c684e199c 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in @@ -5,8 +5,6 @@ // This file is auto-generated. Do not edit manually. -#ifdef BUILD_KERNEL - #include namespace cuvs::neighbors::cagra::detail { @@ -15,25 +13,3 @@ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@d cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail - -#else - -#include -#include -#include "setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix@.h" - -using namespace cuvs::neighbors::cagra::detail; - -__attribute__((__constructor__)) static void register_setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix_reg@() -{ - using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_type_abbrev@; - registerAlgorithm( - "setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@", - embedded_setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix@, - sizeof(embedded_setup_workspace@pq_prefix@_t@team_size@_dim@dataset_block_dim@@pq_suffix@_@type_abbrev@_@idx_abbrev@_@dist_abbrev@@query_type_suffix@)); -} - -#endif From b2e418bebdd6a6330f70ed865948b079c2f3b84f Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 26 Feb 2026 01:40:19 +0000 Subject: [PATCH 130/158] reconcile pr 1807 and add nvjitlink/nvrtc to jit target --- cpp/CMakeLists.txt | 2 +- .../search_multi_cta_kernel_launcher_jit.cuh | 63 ++++++++-------- .../search_single_cta_kernel_launcher.cuh | 57 ++++++++------- .../search_single_cta_kernel_launcher_jit.cuh | 71 ++++++++++--------- 4 files changed, 104 insertions(+), 89 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 89efbf449a..6ee9b5cd15 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -592,7 +592,7 @@ if(NOT BUILD_CPU_ONLY) PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/src" "${CMAKE_CURRENT_SOURCE_DIR}/../c/include" ) - target_link_libraries(cuvs_jit_lto_kernels PRIVATE raft::raft) + target_link_libraries(cuvs_jit_lto_kernels PRIVATE raft::raft CUDA::nvJitLink CUDA::nvrtc) add_library(cuvs::cuvs_jit_lto_kernels ALIAS cuvs_jit_lto_kernels) endif() diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 1df791e99e..a18e44df51 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -9,6 +9,8 @@ #error "search_multi_cta_kernel_launcher_jit.cuh included but CUVS_ENABLE_JIT_LTO not defined!" #endif +#include "../smem_utils.cuh" + // Include tags header before any other includes that might open namespaces #include @@ -187,9 +189,6 @@ void select_and_run_jit( THROW("Result buffer size %u larger than max buffer size %u", result_buffer_size, 256); } - RAFT_CUDA_TRY(cudaFuncSetAttribute( - launcher->get_kernel(), cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); - // Initialize hash table const uint32_t traversed_hash_size = hashmap::get_size(traversed_hash_bitlen); set_value_batch(traversed_hashmap_ptr, @@ -225,33 +224,37 @@ void select_and_run_jit( const uint32_t max_iterations_u32 = static_cast(ps.max_iterations); const unsigned num_random_samplings_u = static_cast(ps.num_random_samplings); - launcher->dispatch(stream, - grid_dims, - block_dims, - smem_size, - topk_indices_ptr, - topk_distances_ptr, - dev_desc, - queries_ptr, - graph.data_handle(), - max_elements, - graph_degree_u32, // Cast int64_t to uint32_t - source_indices_ptr, - num_random_samplings_u, // Cast uint32_t to unsigned for consistency - ps.rand_xor_mask, // uint64_t matches kernel (8 bytes) - dev_seed_ptr, - num_seeds, - visited_hash_bitlen, - traversed_hashmap_ptr, - traversed_hash_bitlen_u32, // Cast int64_t to uint32_t - itopk_size_u32, // Cast size_t to uint32_t - min_iterations_u32, // Cast size_t to uint32_t - max_iterations_u32, // Cast size_t to uint32_t - num_executed_iterations, - query_id_offset, // Offset to add to query_id when calling filter - bitset_ptr, - bitset_len, - original_nbits); + auto kernel_launcher = [&](auto const& kernel) -> void { + launcher->dispatch(stream, + grid_dims, + block_dims, + smem_size, + topk_indices_ptr, + topk_distances_ptr, + dev_desc, + queries_ptr, + graph.data_handle(), + max_elements, + graph_degree_u32, // Cast int64_t to uint32_t + source_indices_ptr, + num_random_samplings_u, // Cast uint32_t to unsigned for consistency + ps.rand_xor_mask, // uint64_t matches kernel (8 bytes) + dev_seed_ptr, + num_seeds, + visited_hash_bitlen, + traversed_hashmap_ptr, + traversed_hash_bitlen_u32, // Cast int64_t to uint32_t + itopk_size_u32, // Cast size_t to uint32_t + min_iterations_u32, // Cast size_t to uint32_t + max_iterations_u32, // Cast size_t to uint32_t + num_executed_iterations, + query_id_offset, // Offset to add to query_id when calling filter + bitset_ptr, + bitset_len, + original_nbits); + }; + cuvs::neighbors::detail::safely_launch_kernel_with_smem_size( + launcher->get_kernel(), smem_size, kernel_launcher); RAFT_CUDA_TRY(cudaPeekAtLastError()); } diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher.cuh index 064b2be28a..4b7cb0a623 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher.cuh @@ -5,6 +5,8 @@ #pragma once +#include "../smem_utils.cuh" + #include "search_single_cta_kernel-inl.cuh" // For search_kernel_config, persistent_runner_t, etc. #include "search_single_cta_kernel_launcher_common.cuh" @@ -80,36 +82,39 @@ control is returned in this thread (in persistent_runner_t constructor), so we'r using descriptor_base_type = dataset_descriptor_base_t; auto kernel = search_kernel_config:: choose_itopk_and_mx_candidates(ps.itopk_size, num_itopk_candidates, block_size); - RAFT_CUDA_TRY( - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + dim3 thread_dims(block_size, 1, 1); dim3 block_dims(1, num_queries, 1); RAFT_LOG_DEBUG( "Launching kernel with %u threads, %u block %u smem", block_size, num_queries, smem_size); - kernel<<>>(topk_indices_ptr, - topk_distances_ptr, - topk, - dataset_desc.dev_ptr(stream), - queries_ptr, - graph.data_handle(), - graph.extent(1), - source_indices_ptr, - ps.num_random_samplings, - ps.rand_xor_mask, - dev_seed_ptr, - num_seeds, - hashmap_ptr, - max_candidates, - max_itopk, - ps.itopk_size, - ps.search_width, - ps.min_iterations, - ps.max_iterations, - num_executed_iterations, - hash_bitlen, - small_hash_bitlen, - small_hash_reset_interval, - sample_filter); + auto const& kernel_launcher = [&](auto const& kernel) -> void { + kernel<<>>(topk_indices_ptr, + topk_distances_ptr, + topk, + dataset_desc.dev_ptr(stream), + queries_ptr, + graph.data_handle(), + graph.extent(1), + source_indices_ptr, + ps.num_random_samplings, + ps.rand_xor_mask, + dev_seed_ptr, + num_seeds, + hashmap_ptr, + max_candidates, + max_itopk, + ps.itopk_size, + ps.search_width, + ps.min_iterations, + ps.max_iterations, + num_executed_iterations, + hash_bitlen, + small_hash_bitlen, + small_hash_reset_interval, + sample_filter); + }; + cuvs::neighbors::detail::safely_launch_kernel_with_smem_size( + kernel, smem_size, kernel_launcher); RAFT_CUDA_TRY(cudaPeekAtLastError()); } } diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 05e4671ebe..336bff5d81 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -9,6 +9,8 @@ #error "search_single_cta_kernel_launcher_jit.cuh included but CUVS_ENABLE_JIT_LTO not defined!" #endif +#include "../smem_utils.cuh" + #include #include @@ -857,38 +859,43 @@ void select_and_run_jit( smem_size); // Dispatch kernel via launcher - launcher->dispatch( - stream, - grid, - block, - smem_size, - topk_indices_ptr, - topk_distances_ptr, - topk, - queries_ptr, - graph.data_handle(), - graph_degree_u32, // Cast int64_t to uint32_t - source_indices_ptr, - num_random_samplings_u, // Cast uint32_t to unsigned for consistency - ps.rand_xor_mask, // uint64_t matches kernel (8 bytes) - dev_seed_ptr, - num_seeds, - hashmap_ptr, - max_candidates, - max_itopk, - itopk_size_u32, // Cast size_t to uint32_t - search_width_u32, // Cast size_t to uint32_t - min_iterations_u32, // Cast size_t to uint32_t - max_iterations_u32, // Cast size_t to uint32_t - num_executed_iterations, - hash_bitlen_u32, // Cast int64_t to uint32_t - small_hash_bitlen_u32, // Cast size_t to uint32_t - small_hash_reset_interval_u32, // Cast size_t to uint32_t - query_id_offset, // Offset to add to query_id when calling filter - dev_desc, // Pass base pointer - kernel expects concrete type but pointer value is same - bitset_ptr, - bitset_len, - original_nbits); + auto kernel_launcher = [&](auto const& kernel) -> void { + launcher->dispatch( + stream, + grid, + block, + smem_size, + topk_indices_ptr, + topk_distances_ptr, + topk, + queries_ptr, + graph.data_handle(), + graph_degree_u32, // Cast int64_t to uint32_t + source_indices_ptr, + num_random_samplings_u, // Cast uint32_t to unsigned for consistency + ps.rand_xor_mask, // uint64_t matches kernel (8 bytes) + dev_seed_ptr, + num_seeds, + hashmap_ptr, + max_candidates, + max_itopk, + itopk_size_u32, // Cast size_t to uint32_t + search_width_u32, // Cast size_t to uint32_t + min_iterations_u32, // Cast size_t to uint32_t + max_iterations_u32, // Cast size_t to uint32_t + num_executed_iterations, + hash_bitlen_u32, // Cast int64_t to uint32_t + small_hash_bitlen_u32, // Cast size_t to uint32_t + small_hash_reset_interval_u32, // Cast size_t to uint32_t + query_id_offset, // Offset to add to query_id when calling filter + dev_desc, // Pass base pointer - kernel expects concrete type but pointer value is same + bitset_ptr, + bitset_len, + original_nbits); + }; + + cuvs::neighbors::detail::safely_launch_kernel_with_smem_size( + launcher->get_kernel(), smem_size, kernel_launcher); RAFT_CUDA_TRY(cudaPeekAtLastError()); } From 53195ef38df5675f80bb79000b8a15b3807c1a33 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 26 Feb 2026 15:37:55 +0000 Subject: [PATCH 131/158] Fix ivf flat --- .../ivf_flat/ivf_flat_interleaved_scan_jit.cuh | 8 ++++++-- .../ivf_flat/jit_lto_kernels/filter_matrix.json | 12 +++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index 240bd2c98a..95f605d262 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -93,8 +93,12 @@ constexpr auto get_metric_name() template constexpr auto get_filter_name() { - if constexpr (std::is_same_v) { return "filter_none_l"; } - if constexpr (std::is_same_v) { return "filter_bitset_l"; } + if constexpr (std::is_same_v) { + return "filter_none_source_index_l"; + } + if constexpr (std::is_same_v) { + return "filter_bitset_source_index_l"; + } } template diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json index 9b2019d9de..f82737eb93 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json @@ -1,13 +1,7 @@ { - "_filter": [ - { - "filter_name": "filter_none", - "header_file": "neighbors/detail/jit_lto_kernels/filter_none.cuh" - }, - { - "filter_name": "filter_bitset", - "header_file": "neighbors/detail/jit_lto_kernels/filter_bitset.cuh" - } + "filter_name": [ + "filter_none", + "filter_bitset" ], "_source_index": [ { From f589b2651dd1ceb6ba007c9c4bbee041ef97348d Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 26 Feb 2026 17:25:47 +0000 Subject: [PATCH 132/158] Fix kernel names and matrices --- .../jit_lto_kernels/cagra_planner_base.hpp | 26 +++++++++---------- ...distance_to_child_nodes_kernel_matrix.json | 6 +++-- .../cagra/jit_lto_kernels/filter_matrix.json | 12 +++------ .../random_pickup_kernel_matrix.json | 6 +++-- .../search_multi_cta_kernel_matrix.json | 6 +++-- .../search_multi_cta_planner.hpp | 4 +-- .../search_multi_kernel_planner.hpp | 4 +-- .../search_single_cta_kernel_matrix.json | 10 ++++--- .../search_single_cta_kernel_p_matrix.json | 6 +++-- .../search_single_cta_planner.hpp | 13 +++++----- .../detail/cagra/shared_launcher_jit.hpp | 8 +++--- 11 files changed, 53 insertions(+), 48 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp index b255c17188..908a27046d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp @@ -33,17 +33,17 @@ struct CagraPlannerBase : AlgorithmPlanner { uint32_t pq_bits = 0, uint32_t pq_len = 0) { - std::string key = "setup_workspace_"; + std::string key = "setup_workspace"; if (is_vpq) { - key += "vpq_"; + key += "_vpq"; auto params = make_fragment_key(); - key += "t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); + key += "_team_size_" + std::to_string(team_size); + key += "_dataset_block_dim_" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; if (!params.empty()) { key += "_" + params; } } else { - key += "standard_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); + key += "_standard_team_size_" + std::to_string(team_size); + key += "_dataset_block_dim_" + std::to_string(dataset_block_dim); auto params = make_fragment_key(); if (!params.empty()) { key += "_" + params; } } @@ -58,16 +58,16 @@ struct CagraPlannerBase : AlgorithmPlanner { uint32_t pq_len = 0) { if (is_vpq) { - std::string key = "compute_distance_vpq_"; + std::string key = "compute_distance_vpq"; auto params = make_fragment_key(); - key += "t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); + key += "_team_size_" + std::to_string(team_size); + key += "_dataset_block_dim_" + std::to_string(dataset_block_dim); key += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; if (!params.empty()) { key += "_" + params; } this->device_functions.push_back(key); } else { - std::string key = "compute_distance_standard_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); + std::string key = "compute_distance_standard_team_size_" + std::to_string(team_size); + key += "_dataset_block_dim_" + std::to_string(dataset_block_dim); auto params = make_fragment_key(); if (!params.empty()) { key += "_" + params; } this->device_functions.push_back(key); @@ -105,8 +105,8 @@ struct CagraPlannerBase : AlgorithmPlanner { } auto params = make_fragment_key(); std::string key = "apply_normalization_standard_" + normalization_type; - key += "_t" + std::to_string(team_size); - key += "_dim" + std::to_string(dataset_block_dim); + key += "_team_size_" + std::to_string(team_size); + key += "_dataset_block_dim_" + std::to_string(dataset_block_dim); if (!params.empty()) { key += "_" + params; } this->device_functions.push_back(key); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json index 81c8c1c48f..8254391243 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json @@ -85,7 +85,8 @@ "_codebook": [ { "codebook_type": "void", - "codebook_tag": "" + "codebook_tag": "", + "codebook_comma": "" } ] }, @@ -159,7 +160,8 @@ "_codebook": [ { "codebook_type": "half", - "codebook_tag": ", tag_codebook_half" + "codebook_tag": "tag_codebook_half", + "codebook_comma": ", " } ] } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_matrix.json index c253774432..d83fbe4b76 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/filter_matrix.json @@ -1,13 +1,7 @@ { - "_filter": [ - { - "filter_name": "filter_none", - "header_file": "neighbors/detail/jit_lto_kernels/filter_none.cuh" - }, - { - "filter_name": "filter_bitset", - "header_file": "neighbors/detail/jit_lto_kernels/filter_bitset.cuh" - } + "filter_name": [ + "filter_none", + "filter_bitset" ], "_source_index": [ { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json index 757f519685..48745aeb98 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json @@ -79,7 +79,8 @@ "_codebook": [ { "codebook_type": "void", - "codebook_tag": "" + "codebook_tag": "", + "codebook_comma": "" } ] }, @@ -147,7 +148,8 @@ "_codebook": [ { "codebook_type": "half", - "codebook_tag": ", tag_codebook_half" + "codebook_tag": "tag_codebook_half", + "codebook_comma": ", " } ] } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json index 81c8c1c48f..8254391243 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json @@ -85,7 +85,8 @@ "_codebook": [ { "codebook_type": "void", - "codebook_tag": "" + "codebook_tag": "", + "codebook_comma": "" } ] }, @@ -159,7 +160,8 @@ "_codebook": [ { "codebook_type": "half", - "codebook_tag": ", tag_codebook_half" + "codebook_tag": "tag_codebook_half", + "codebook_comma": ", " } ] } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index af01047be5..2388f06c44 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -55,8 +55,8 @@ struct CagraMultiCtaSearchPlanner { std::string name = "search_multi_cta_kernel"; if (is_vpq) { name += "_vpq"; } - name += "_t" + std::to_string(team_size); - name += "_dim" + std::to_string(dataset_block_dim); + name += "_team_size_" + std::to_string(team_size); + name += "_dataset_block_dim_" + std::to_string(dataset_block_dim); if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } return name; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index 84f30e5ba7..f062939993 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -64,8 +64,8 @@ struct CagraMultiKernelSearchPlanner std::string name = kernel_name; if (is_vpq) { name += "_vpq"; } - name += "_t" + std::to_string(team_size); - name += "_dim" + std::to_string(dataset_block_dim); + name += "_team_size_" + std::to_string(team_size); + name += "_dataset_block_dim_" + std::to_string(dataset_block_dim); if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } return name; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json index 3ea5b9c72a..c54ae715c7 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json @@ -77,11 +77,11 @@ "_bitonic_sort_and_merge_multi_warps": [ { "bitonic_sort_and_merge_multi_warps": "true", - "bitonic_sort_and_merge_multi_warps_str": "topk_by_bitonic_sort" + "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps" }, { "bitonic_sort_and_merge_multi_warps": "false", - "bitonic_sort_and_merge_multi_warps_str": "no_topk_by_bitonic_sort" + "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" } ], "team_size": [ @@ -105,7 +105,8 @@ "_codebook": [ { "codebook_type": "void", - "codebook_tag": "" + "codebook_tag": "", + "codebook_comma": "" } ] }, @@ -199,7 +200,8 @@ "_codebook": [ { "codebook_type": "half", - "codebook_tag": ", tag_codebook_half" + "codebook_tag": "tag_codebook_half", + "codebook_comma": ", " } ] } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json index 284e7ae497..c54ae715c7 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json @@ -105,7 +105,8 @@ "_codebook": [ { "codebook_type": "void", - "codebook_tag": "" + "codebook_tag": "", + "codebook_comma": "" } ] }, @@ -199,7 +200,8 @@ "_codebook": [ { "codebook_type": "half", - "codebook_tag": ", tag_codebook_half" + "codebook_tag": "tag_codebook_half", + "codebook_comma": ", " } ] } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index 9cd3b41e73..bfaa4e7f59 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -60,16 +60,15 @@ struct CagraSingleCtaSearchPlanner uint32_t pq_len, bool persistent) { - std::string name = (persistent ? "search_single_cta_kernel_p_" : "search_single_cta_kernel_"); - name += bool_to_string(topk_by_bitonic_sort) + "_"; - name += bool_to_string(bitonic_sort_and_merge_multi_warps) + "_"; - name += "t" + std::to_string(team_size); - name += "_dim" + std::to_string(dataset_block_dim); + std::string name = (persistent ? "search_single_cta_kernel_p" : "search_single_cta_kernel"); + name += std::string(topk_by_bitonic_sort ? "_" : "_no_") + "topk_by_bitonic_sort"; + name += std::string(bitonic_sort_and_merge_multi_warps ? "_" : "_no_") + + "bitonic_sort_and_merge_multi_warps"; + name += "_team_size_" + std::to_string(team_size); + name += "_dataset_block_dim_" + std::to_string(dataset_block_dim); if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } return name; } - - static std::string bool_to_string(bool b) { return b ? "true" : "false"; } }; } // namespace single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp index 3085616a23..33dcf9cbf9 100644 --- a/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp +++ b/cpp/src/neighbors/detail/cagra/shared_launcher_jit.hpp @@ -96,7 +96,9 @@ std::string get_sample_filter_name() using DecayedFilter = std::decay_t; // First check for none_sample_filter (the only unwrapped case) - if constexpr (std::is_same_v) { return "filter_none_ui"; } + if constexpr (std::is_same_v) { + return "filter_none_source_index_ui"; + } // All other filters are wrapped in CagraSampleFilterWithQueryIdOffset // Access the inner filter type via decltype @@ -105,12 +107,12 @@ std::string get_sample_filter_name() if constexpr (is_bitset_filter::value || std::is_same_v> || std::is_same_v>) { - return "filter_bitset_ui"; + return "filter_bitset_source_index_ui"; } } // Default to none filter for unknown types - return "filter_none_ui"; + return "filter_none_source_index_ui"; } } // namespace cuvs::neighbors::cagra::detail From 6b8d1750be3487b9b9a1789c0c888e3707f43e86 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 26 Feb 2026 19:09:32 +0000 Subject: [PATCH 133/158] Fix query --- .../cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json | 4 ++-- .../jit_lto_kernels/search_single_cta_kernel_matrix.json | 4 ++-- .../jit_lto_kernels/search_single_cta_kernel_p_matrix.json | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json index 8254391243..929165330b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json @@ -111,8 +111,8 @@ ], "_query": [ { - "query_type": "float", - "query_abbrev": "f" + "query_type": "half", + "query_abbrev": "h" } ], "_source_index": [ diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json index c54ae715c7..d9f3e97653 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json @@ -131,8 +131,8 @@ ], "_query": [ { - "query_type": "float", - "query_abbrev": "f" + "query_type": "half", + "query_abbrev": "h" } ], "_source_index": [ diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json index c54ae715c7..d9f3e97653 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json @@ -131,8 +131,8 @@ ], "_query": [ { - "query_type": "float", - "query_abbrev": "f" + "query_type": "half", + "query_abbrev": "h" } ], "_source_index": [ From 426625e36129fca7a8cfc941baf1b79d61dcc2b8 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 26 Feb 2026 19:12:48 +0000 Subject: [PATCH 134/158] Fix another query --- .../compute_distance_to_child_nodes_kernel_matrix.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json index 8254391243..929165330b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json @@ -111,8 +111,8 @@ ], "_query": [ { - "query_type": "float", - "query_abbrev": "f" + "query_type": "half", + "query_abbrev": "h" } ], "_source_index": [ From 97dfa18e9ae73fe0db6881b6401522577b4ce58f Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 26 Feb 2026 19:15:07 +0000 Subject: [PATCH 135/158] More --- .../cagra/jit_lto_kernels/random_pickup_kernel_matrix.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json index 48745aeb98..3c014f8580 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json @@ -105,8 +105,8 @@ ], "_query": [ { - "query_type": "float", - "query_abbrev": "f" + "query_type": "half", + "query_abbrev": "h" } ], "_index": [ From 29881c888498d690f463fd4209977225985d9c87 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 26 Feb 2026 21:48:13 +0000 Subject: [PATCH 136/158] Make naming and matrices more consistent --- cpp/CMakeLists.txt | 52 +++++++++---------- .../jit_lto_kernels/apply_filter_kernel.cu.in | 2 +- .../compute_distance_embedded.cpp.in | 2 +- .../compute_distance_kernel.cu.in | 2 +- .../compute_distance_matrix.json | 16 +++--- ...e_distance_to_child_nodes_embedded.cpp.in} | 2 +- ...pute_distance_to_child_nodes_kernel.cu.in} | 2 +- ...mpute_distance_to_child_nodes_matrix.json} | 0 .../jit_lto_kernels/dist_op_embedded.cpp.in | 2 +- .../cagra/jit_lto_kernels/dist_op_matrix.json | 6 +-- ...d.cpp.in => random_pickup_embedded.cpp.in} | 2 +- ...ernel.cu.in => random_pickup_kernel.cu.in} | 2 +- ..._matrix.json => random_pickup_matrix.json} | 0 ...pp.in => search_multi_cta_embedded.cpp.in} | 2 +- ...ernel_jit.cuh => search_multi_cta_jit.cuh} | 0 ...el.cu.in => search_multi_cta_kernel.cu.in} | 2 +- ...trix.json => search_multi_cta_matrix.json} | 0 .../search_multi_cta_planner.hpp | 2 +- ...ti_kernel_jit.cuh => search_multi_jit.cuh} | 0 ...p.in => search_single_cta_embedded.cpp.in} | 2 +- ...rnel_jit.cuh => search_single_cta_jit.cuh} | 0 ...l.cu.in => search_single_cta_kernel.cu.in} | 2 +- ...rix.json => search_single_cta_matrix.json} | 0 ...in => search_single_cta_p_embedded.cpp.in} | 2 +- ...cu.in => search_single_cta_p_kernel.cu.in} | 2 +- ...x.json => search_single_cta_p_matrix.json} | 0 .../search_single_cta_planner.hpp | 2 +- .../search_multi_kernel_launcher_jit.cuh | 12 ++--- 28 files changed, 57 insertions(+), 61 deletions(-) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{compute_distance_to_child_nodes_kernel_embedded.cpp.in => compute_distance_to_child_nodes_embedded.cpp.in} (84%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{compute_distance_to_child_nodes_kernel_kernel.cu.in => compute_distance_to_child_nodes_kernel.cu.in} (93%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{compute_distance_to_child_nodes_kernel_matrix.json => compute_distance_to_child_nodes_matrix.json} (100%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{random_pickup_kernel_embedded.cpp.in => random_pickup_embedded.cpp.in} (86%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{random_pickup_kernel_kernel.cu.in => random_pickup_kernel.cu.in} (91%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{random_pickup_kernel_matrix.json => random_pickup_matrix.json} (100%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_multi_cta_kernel_embedded.cpp.in => search_multi_cta_embedded.cpp.in} (85%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_multi_cta_kernel_jit.cuh => search_multi_cta_jit.cuh} (100%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_multi_cta_kernel_kernel.cu.in => search_multi_cta_kernel.cu.in} (98%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_multi_cta_kernel_matrix.json => search_multi_cta_matrix.json} (100%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_multi_kernel_jit.cuh => search_multi_jit.cuh} (100%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_single_cta_kernel_embedded.cpp.in => search_single_cta_embedded.cpp.in} (80%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_single_cta_kernel_jit.cuh => search_single_cta_jit.cuh} (100%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_single_cta_kernel_kernel.cu.in => search_single_cta_kernel.cu.in} (98%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_single_cta_kernel_matrix.json => search_single_cta_matrix.json} (100%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_single_cta_kernel_p_embedded.cpp.in => search_single_cta_p_embedded.cpp.in} (79%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_single_cta_kernel_p_kernel.cu.in => search_single_cta_p_kernel.cu.in} (98%) rename cpp/src/neighbors/detail/cagra/jit_lto_kernels/{search_single_cta_kernel_p_matrix.json => search_single_cta_p_matrix.json} (100%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6ee9b5cd15..5c110c394f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -483,69 +483,67 @@ if(NOT BUILD_CPU_ONLY) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + "cagra_search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json" KERNEL_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_kernel.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in" EMBEDDED_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_embedded.cpp.in" - OUTPUT_DIRECTORY - "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/search_single_cta_kernel" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/search_single_cta" KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements ) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + "cagra_search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json" KERNEL_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_kernel.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in" EMBEDDED_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_embedded.cpp.in" - OUTPUT_DIRECTORY - "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/search_single_cta_kernel_p" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/search_single_cta_p" KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements ) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_search_multi_cta_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + "cagra_search_multi_cta@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json" KERNEL_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_kernel.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in" EMBEDDED_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_embedded.cpp.in" - OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/search_multi_cta_kernel" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/search_multi_cta" KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements ) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_random_pickup_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + "cagra_random_pickup@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json" KERNEL_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_kernel.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in" EMBEDDED_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_embedded.cpp.in" - OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/random_pickup_kernel" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in" + OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/random_pickup" KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements ) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_compute_distance_to_child_nodes_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + "cagra_compute_distance_to_child_nodes@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json" KERNEL_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_kernel.cu.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in" EMBEDDED_INPUT_FILE - "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_embedded.cpp.in" + "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in" OUTPUT_DIRECTORY - "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/compute_distance_to_child_nodes_kernel" + "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/cagra/compute_distance_to_child_nodes" KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements ) generate_jit_lto_kernels( diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in index ec1584e035..121eb91ee9 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in @@ -5,7 +5,7 @@ // This file is auto-generated. Do not edit manually. -#include +#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_embedded.cpp.in index dbe385de2f..e9647bef04 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_embedded.cpp.in @@ -19,7 +19,7 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( + QueryTag@codebook_comma@ @codebook_tag@>( "compute_distance@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, sizeof(embedded_fatbin)); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in index 3fe7d58aec..b9da6b20e2 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in @@ -5,7 +5,7 @@ // This file is auto-generated. Do not edit manually. -#include +#include namespace cuvs::neighbors::cagra::detail { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_matrix.json index c863fbec27..39cf9ad2c5 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_matrix.json @@ -70,20 +70,19 @@ ], "_pq": [ { - "pq_prefix": "_standard", - "pq_suffix": "", + "pq_len": "0", "pq_bits": "0", - "pq_len": "0" + "pq_prefix": "_standard", + "pq_suffix": "" } ], "_codebook": [ { "codebook_type": "void", "codebook_tag": "", - "codebook_tag_comma": "" + "codebook_comma": "" } - ], - "impl_file": "compute_distance_standard_impl.cuh" + ] }, { "_data": [ @@ -150,9 +149,8 @@ { "codebook_type": "half", "codebook_tag": "tag_codebook_half", - "codebook_tag_comma": ", " + "codebook_comma": ", " } - ], - "impl_file": "compute_distance_vpq_impl.cuh" + ] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in similarity index 84% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_embedded.cpp.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in index 4be07927cb..7b8fcf0b31 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in @@ -19,7 +19,7 @@ __attribute__((__constructor__)) void register_kernel() tag_idx_@index_abbrev@, tag_dist_@distance_abbrev@, tag_idx_@source_index_abbrev@@codebook_comma@ @codebook_tag@>( - "compute_distance_to_child_nodes_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + "compute_distance_to_child_nodes@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in similarity index 93% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_kernel.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index 09f874ab76..6da7c92d1d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -5,7 +5,7 @@ // This file is auto-generated. Do not edit manually. -#include +#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json similarity index 100% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel_matrix.json rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in index 4f905e1c3a..fa859dc626 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in @@ -15,7 +15,7 @@ namespace { __attribute__((__constructor__)) static void register_kernel() { - registerAlgorithm( "dist_op_@metric_tag@", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json index 174a917811..7f0772ab1f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json @@ -3,17 +3,17 @@ { "metric_tag": "l2", "query_type": "float", - "query_type_abbrev": "f" + "query_abbrev": "f" }, { "metric_tag": "inner_product", "query_type": "float", - "query_type_abbrev": "f" + "query_abbrev": "f" }, { "metric_tag": "hamming", "query_type": "uint8_t", - "query_type_abbrev": "uc" + "query_abbrev": "uc" } ], "_distance": [ diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in similarity index 86% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_embedded.cpp.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in index 8fd7bfe083..19cbfb1527 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in @@ -19,7 +19,7 @@ __attribute__((__constructor__)) void register_kernel() tag_idx_@index_abbrev@, tag_dist_@distance_abbrev@, tag_idx_@index_abbrev@@codebook_comma@ @codebook_tag@>( - "random_pickup_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + "random_pickup@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in similarity index 91% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_kernel.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in index c64de31628..4b7359b6a0 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -5,7 +5,7 @@ // This file is auto-generated. Do not edit manually. -#include +#include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json similarity index 100% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel_matrix.json rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in similarity index 85% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_embedded.cpp.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in index 40f7bad9dd..df37bdd0d3 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in @@ -19,7 +19,7 @@ __attribute__((__constructor__)) void register_kernel() tag_idx_@index_abbrev@, tag_dist_@distance_abbrev@, tag_idx_@source_index_abbrev@@codebook_comma@ @codebook_tag@>( - "search_multi_cta_kernel@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + "search_multi_cta@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh similarity index 100% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_jit.cuh rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in similarity index 98% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_kernel.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index 97db64de5d..37663545ff 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -6,7 +6,7 @@ // This file is auto-generated. Do not edit manually. #include -#include +#include namespace cuvs::neighbors::cagra::detail::multi_cta_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json similarity index 100% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel_matrix.json rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index 2388f06c44..0526baab81 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -53,7 +53,7 @@ struct CagraMultiCtaSearchPlanner uint32_t pq_bits, uint32_t pq_len) { - std::string name = "search_multi_cta_kernel"; + std::string name = "search_multi_cta"; if (is_vpq) { name += "_vpq"; } name += "_team_size_" + std::to_string(team_size); name += "_dataset_block_dim_" + std::to_string(dataset_block_dim); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh similarity index 100% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_jit.cuh rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in similarity index 80% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_embedded.cpp.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in index 9e31c2c776..4756969487 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in @@ -19,7 +19,7 @@ __attribute__((__constructor__)) void register_kernel() tag_idx_@index_abbrev@, tag_dist_@distance_abbrev@, tag_idx_@source_index_abbrev@@codebook_comma@ @codebook_tag@>( - "search_single_cta_kernel_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + "search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh similarity index 100% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_jit.cuh rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in similarity index 98% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_kernel.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index 084b942c5b..0b21733771 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -5,7 +5,7 @@ // This file is auto-generated. Do not edit manually. -#include +#include namespace cuvs::neighbors::cagra::detail::single_cta_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json similarity index 100% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_matrix.json rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in similarity index 79% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_embedded.cpp.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in index fcac35e849..cbcffa10ae 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in @@ -19,7 +19,7 @@ __attribute__((__constructor__)) void register_kernel() tag_idx_@index_abbrev@, tag_dist_@distance_abbrev@, tag_idx_@source_index_abbrev@@codebook_comma@ @codebook_tag@>( - "search_single_cta_kernel_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + "search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in similarity index 98% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_kernel.cu.in rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in index a5fad163f6..b0b02e893b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in @@ -5,7 +5,7 @@ // This file is auto-generated. Do not edit manually. -#include +#include namespace cuvs::neighbors::cagra::detail::single_cta_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json similarity index 100% rename from cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel_p_matrix.json rename to cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index bfaa4e7f59..3578e87538 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -60,7 +60,7 @@ struct CagraSingleCtaSearchPlanner uint32_t pq_len, bool persistent) { - std::string name = (persistent ? "search_single_cta_kernel_p" : "search_single_cta_kernel"); + std::string name = (persistent ? "search_single_cta_p" : "search_single_cta"); name += std::string(topk_by_bitonic_sort ? "_" : "_no_") + "topk_by_bitonic_sort"; name += std::string(bitonic_sort_and_merge_multi_warps ? "_" : "_no_") + "bitonic_sort_and_merge_multi_warps"; diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh index 5279165a43..c3f13d07c3 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -62,7 +62,7 @@ void random_pickup_jit(const dataset_descriptor_host& using CodebookTag = codebook_tag_vpq_t; CagraMultiKernelSearchPlanner planner(dataset_desc.metric, - "random_pickup_kernel", + "random_pickup", dataset_desc.team_size, dataset_desc.dataset_block_dim, dataset_desc.is_vpq, @@ -88,7 +88,7 @@ void random_pickup_jit(const dataset_descriptor_host& query_type_tag_standard_t; CagraMultiKernelSearchPlanner planner(dataset_desc.metric, - "random_pickup_kernel", + "random_pickup", dataset_desc.team_size, dataset_desc.dataset_block_dim, dataset_desc.is_vpq, @@ -111,7 +111,7 @@ void random_pickup_jit(const dataset_descriptor_host& using QueryTag = query_type_tag_standard_t; CagraMultiKernelSearchPlanner planner(dataset_desc.metric, - "random_pickup_kernel", + "random_pickup", dataset_desc.team_size, dataset_desc.dataset_block_dim, dataset_desc.is_vpq, @@ -205,7 +205,7 @@ void compute_distance_to_child_nodes_jit( using CodebookTag = codebook_tag_vpq_t; CagraMultiKernelSearchPlanner planner(dataset_desc.metric, - "compute_distance_to_child_nodes_kernel", + "compute_distance_to_child_nodes", dataset_desc.team_size, dataset_desc.dataset_block_dim, dataset_desc.is_vpq, @@ -231,7 +231,7 @@ void compute_distance_to_child_nodes_jit( query_type_tag_standard_t; CagraMultiKernelSearchPlanner planner(dataset_desc.metric, - "compute_distance_to_child_nodes_kernel", + "compute_distance_to_child_nodes", dataset_desc.team_size, dataset_desc.dataset_block_dim, dataset_desc.is_vpq, @@ -254,7 +254,7 @@ void compute_distance_to_child_nodes_jit( using QueryTag = query_type_tag_standard_t; CagraMultiKernelSearchPlanner planner(dataset_desc.metric, - "compute_distance_to_child_nodes_kernel", + "compute_distance_to_child_nodes", dataset_desc.team_size, dataset_desc.dataset_block_dim, dataset_desc.is_vpq, From bb01ec6cb58181a214b272fd615b0b173229931e Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 26 Feb 2026 22:09:52 +0000 Subject: [PATCH 137/158] add func specialization for smem launcher --- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 1 + .../search_multi_cta_planner.hpp | 9 +---- .../search_multi_kernel_planner.hpp | 10 +---- cpp/src/neighbors/detail/smem_utils.cuh | 38 +++++++++++++++++++ 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index ef72a36107..43666c514e 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -42,6 +42,7 @@ void AlgorithmLauncher::call( config.stream = stream; config.dynamicSmemBytes = shared_mem; config.numAttrs = 0; + std::cout << "Launching kernel with shared_mem: " << shared_mem << std::endl; RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index 2388f06c44..e34b9521d2 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -35,17 +35,10 @@ struct CagraMultiCtaSearchPlanner : CagraPlannerBase( build_entrypoint_name(metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len), is_vpq ? make_fragment_key() - : make_fragment_key()), - entrypoint_name_( - build_entrypoint_name(metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len)) + : make_fragment_key()) { } - const std::string& get_entrypoint_name() const { return entrypoint_name_; } - - private: - std::string entrypoint_name_; - static std::string build_entrypoint_name(cuvs::distance::DistanceType metric, uint32_t team_size, uint32_t dataset_block_dim, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index f062939993..bd0f5e4324 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -39,19 +39,11 @@ struct CagraMultiKernelSearchPlanner ? make_fragment_key() : (is_vpq ? make_fragment_key() - : make_fragment_key())), - entrypoint_name_(build_entrypoint_name( - kernel_name, metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len)) + : make_fragment_key())) { } - const std::string& get_entrypoint_name() const { return entrypoint_name_; } - - void set_entrypoint_name(const std::string& name) { entrypoint_name_ = name; } - private: - std::string entrypoint_name_; - static std::string build_entrypoint_name(const std::string& kernel_name, cuvs::distance::DistanceType metric, uint32_t team_size, diff --git a/cpp/src/neighbors/detail/smem_utils.cuh b/cpp/src/neighbors/detail/smem_utils.cuh index 41c95c0ccd..3d8f5cb2eb 100644 --- a/cpp/src/neighbors/detail/smem_utils.cuh +++ b/cpp/src/neighbors/detail/smem_utils.cuh @@ -8,7 +8,9 @@ #include #include +#include #include +#include namespace cuvs::neighbors::detail { @@ -26,6 +28,42 @@ namespace cuvs::neighbors::detail { * @param smem_size The size of the dynamic shared memory to be set. * @param launch The kernel launch function/lambda. */ +// Specialization for cudaKernel_t (JIT LTO kernels) - track by kernel pointer +template +void safely_launch_kernel_with_smem_size(cudaKernel_t kernel, + uint32_t smem_size, + KernelLauncherT const& launch) +{ + // For JIT kernels, track by kernel pointer since all cudaKernel_t have the same type + static std::unordered_map> jit_smem_sizes; + + auto& current_smem_size = jit_smem_sizes[kernel]; + auto last_smem_size = current_smem_size.load(std::memory_order_relaxed); + + if (smem_size > last_smem_size) { + static std::mutex jit_mutex; + std::lock_guard guard(jit_mutex); + if (!current_smem_size.compare_exchange_strong( + last_smem_size, smem_size, std::memory_order_relaxed, std::memory_order_relaxed)) { + // The value has been updated by another thread between the load and the mutex acquisition. + if (smem_size > last_smem_size) { + current_smem_size.store(smem_size, std::memory_order_relaxed); + } + } + // Only update if the last seen value is smaller than the new one. + if (smem_size > last_smem_size) { + auto launch_status = + cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size); + RAFT_EXPECTS(launch_status == cudaSuccess, + "Failed to set max dynamic shared memory size to %u bytes", + smem_size); + } + } + + return launch(kernel); +} + +// General template for regular function pointers template void safely_launch_kernel_with_smem_size(KernelT const& kernel, uint32_t smem_size, From 6516f7810608bc426586eca358ccd0dde861dc37 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 27 Feb 2026 00:07:59 +0000 Subject: [PATCH 138/158] fix ivf flat udf key --- cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh index 95f605d262..256e37221a 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan_jit.cuh @@ -188,7 +188,7 @@ void launch_kernel(const index& index, std::string metric_name = "metric_udf_" + udf_hash; auto& nvrtc_lto_compiler = nvrtc_compiler(); std::string key = - metric_name + "_" + std::to_string(Veclen) + "_" + + metric_name + "_veclen_" + std::to_string(Veclen) + "_" + make_fragment_key()), decltype(get_acc_type_tag())>(); nvrtc_lto_compiler.compile(key, metric_udf); kernel_planner.template add_metric_device_function()), From d737706342da7e841cb304f70de8097773dec355 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 27 Feb 2026 00:13:25 +0000 Subject: [PATCH 139/158] remove debug --- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 43666c514e..ef72a36107 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -42,7 +42,6 @@ void AlgorithmLauncher::call( config.stream = stream; config.dynamicSmemBytes = shared_mem; config.numAttrs = 0; - std::cout << "Launching kernel with shared_mem: " << shared_mem << std::endl; RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); } From a809041d4cbc6ab5eaa22d595f1789f1744f306d Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 27 Feb 2026 00:14:33 +0000 Subject: [PATCH 140/158] Remove comments and debug statement, fix query, copyright --- .pre-commit-config.yaml | 2 +- cpp/CMakeLists.txt | 2 +- cpp/src/detail/jit_lto/AlgorithmLauncher.cu | 1 - .../apply_filter_embedded.cpp.in | 2 -- .../jit_lto_kernels/apply_filter_kernel.cu.in | 2 -- ...ply_normalization_standard_embedded.cpp.in | 4 +-- .../apply_normalization_standard_kernel.cu.in | 4 +-- .../apply_normalization_standard_matrix.json | 26 ++++++++++++++++--- .../compute_distance_embedded.cpp.in | 2 -- .../compute_distance_kernel.cu.in | 2 -- ...te_distance_to_child_nodes_embedded.cpp.in | 2 -- ...mpute_distance_to_child_nodes_kernel.cu.in | 2 -- .../jit_lto_kernels/dist_op_embedded.cpp.in | 2 -- .../jit_lto_kernels/dist_op_kernel.cu.in | 2 -- .../random_pickup_embedded.cpp.in | 2 -- .../random_pickup_kernel.cu.in | 2 -- .../search_multi_cta_embedded.cpp.in | 2 -- .../search_multi_cta_kernel.cu.in | 2 -- .../search_single_cta_embedded.cpp.in | 2 -- .../search_single_cta_kernel.cu.in | 2 -- .../search_single_cta_p_embedded.cpp.in | 2 -- .../search_single_cta_p_kernel.cu.in | 2 -- .../setup_workspace_embedded.cpp.in | 2 -- .../setup_workspace_kernel.cu.in | 2 -- .../jit_lto_kernels/filter_embedded.cpp.in | 2 -- .../jit_lto_kernels/filter_kernel.cu.in | 2 -- .../interleaved_scan_embedded.cpp.in | 2 -- .../interleaved_scan_kernel.cu.in | 2 -- .../jit_lto_kernels/metric_embedded.cpp.in | 2 -- .../jit_lto_kernels/metric_kernel.cu.in | 2 -- .../post_lambda_embedded.cpp.in | 2 -- .../jit_lto_kernels/post_lambda_kernel.cu.in | 2 -- 32 files changed, 27 insertions(+), 64 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 28546f8332..fa5bba14fa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -109,7 +109,7 @@ repos: args: [--fix, --spdx] files: | (?x) - [.](cmake|c|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx|rs|java)$| + [.](cmake|c|cpp([.]in)?|cu([.]in)?|cuh|h|hpp|sh|pxd|py|pyx|rs|java)$| CMakeLists[.]txt$| CMakeLists_standalone[.]txt$| meta[.]yaml$| diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5c110c394f..8c33623615 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -469,7 +469,7 @@ if(NOT BUILD_CPU_ONLY) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_apply_normalization_standard@normalization_suffix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_data_@data_abbrev@" + "cagra_apply_normalization_standard@normalization_suffix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_matrix.json" KERNEL_INPUT_FILE diff --git a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu index 43666c514e..ef72a36107 100644 --- a/cpp/src/detail/jit_lto/AlgorithmLauncher.cu +++ b/cpp/src/detail/jit_lto/AlgorithmLauncher.cu @@ -42,7 +42,6 @@ void AlgorithmLauncher::call( config.stream = stream; config.dynamicSmemBytes = shared_mem; config.numAttrs = 0; - std::cout << "Launching kernel with shared_mem: " << shared_mem << std::endl; RAFT_CUDA_TRY(cudaLaunchKernelExC(&config, kernel, kernel_args)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_embedded.cpp.in index 92151fad70..f60e8cbc82 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in index 121eb91ee9..eae9b5e32d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_filter_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_embedded.cpp.in index 7a2b463c8b..44412d3c8b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" @@ -15,7 +13,7 @@ namespace { __attribute__((__constructor__)) void register_kernel() { - using QueryTag = cuvs::neighbors::cagra::detail::tag_f; + using QueryTag = cuvs::neighbors::cagra::detail::tag_@query_abbrev@; registerAlgorithm namespace cuvs::neighbors::cagra::detail { using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; -template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, float>( +template __device__ @distance_type@ apply_normalization_standard<@team_size@, @dataset_block_dim@, @data_type@, @index_type@, @distance_type@, @query_type@>( @distance_type@, const args_t, @index_type@); } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_matrix.json index d5c9cbfffa..077684b5be 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/apply_normalization_standard_matrix.json @@ -39,8 +39,28 @@ "256", "512" ], - "normalization_suffix": [ - "_noop", - "_cosine" + "_normalization": [ + { + "normalization_suffix": "_noop", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + }, + { + "query_type": "uint8_t", + "query_abbrev": "uc" + } + ] + }, + { + "normalization_suffix": "_cosine", + "_query": [ + { + "query_type": "float", + "query_abbrev": "f" + } + ] + } ] } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_embedded.cpp.in index e9647bef04..f577c6a6d6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in index b9da6b20e2..6163dce4ac 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace cuvs::neighbors::cagra::detail { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in index 7b8fcf0b31..a3d8ecd28f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index 6da7c92d1d..1a1baed5e7 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in index fa859dc626..a15ab944dd 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in index 6b0158d79c..5e18a3ba4c 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace cuvs::neighbors::cagra::detail { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in index 19cbfb1527..eb6f6f3c7b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in index 4b7359b6a0..d5424b780b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace cuvs::neighbors::cagra::detail::multi_kernel_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in index df37bdd0d3..67e1238b05 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index 37663545ff..1501a22ff5 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in index 4756969487..2ccb4d9820 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index 0b21733771..2785ea46ac 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace cuvs::neighbors::cagra::detail::single_cta_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in index cbcffa10ae..5a24781346 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in index b0b02e893b..96e4784101 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace cuvs::neighbors::cagra::detail::single_cta_search { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_embedded.cpp.in index 23469f995e..2b10e81a17 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in index 2c684e199c..7872328efb 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace cuvs::neighbors::cagra::detail { diff --git a/cpp/src/neighbors/detail/jit_lto_kernels/filter_embedded.cpp.in b/cpp/src/neighbors/detail/jit_lto_kernels/filter_embedded.cpp.in index 4cd80fdaf0..3e00b20e9a 100644 --- a/cpp/src/neighbors/detail/jit_lto_kernels/filter_embedded.cpp.in +++ b/cpp/src/neighbors/detail/jit_lto_kernels/filter_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in b/cpp/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in index 36588babdd..7350f6bb58 100644 --- a/cpp/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in +++ b/cpp/src/neighbors/detail/jit_lto_kernels/filter_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace @namespace@ { diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_embedded.cpp.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_embedded.cpp.in index a24fa66e58..9270d254fc 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_embedded.cpp.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in index 63fb7823a6..b199f0e4d8 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include namespace cuvs::neighbors::ivf_flat::detail { diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_embedded.cpp.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_embedded.cpp.in index c21b14346b..a979411143 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_embedded.cpp.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_kernel.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_kernel.cu.in index a67956db58..09dedc2bb2 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_kernel.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/metric_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include <@header_file@> namespace cuvs::neighbors::ivf_flat::detail { diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_embedded.cpp.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_embedded.cpp.in index a2e3f1ea03..b3449e8e17 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_embedded.cpp.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_embedded.cpp.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include #include "@embedded_header_file@" diff --git a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_kernel.cu.in b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_kernel.cu.in index 363964dd42..99823843c6 100644 --- a/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_kernel.cu.in +++ b/cpp/src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_kernel.cu.in @@ -3,8 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -// This file is auto-generated. Do not edit manually. - #include <@header_file@> namespace cuvs::neighbors::ivf_flat::detail { From 49f999fdd139f79df274f50fb130e9cc0b291002 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 27 Feb 2026 03:20:37 +0000 Subject: [PATCH 141/158] missing query tag --- .../compute_distance_to_child_nodes_embedded.cpp.in | 1 + .../cagra/jit_lto_kernels/random_pickup_embedded.cpp.in | 1 + .../jit_lto_kernels/search_multi_cta_embedded.cpp.in | 1 + .../cagra/jit_lto_kernels/search_multi_cta_planner.hpp | 9 +++++++-- .../jit_lto_kernels/search_multi_kernel_planner.hpp | 9 +++++++-- .../jit_lto_kernels/search_single_cta_embedded.cpp.in | 1 + .../jit_lto_kernels/search_single_cta_p_embedded.cpp.in | 1 + .../cagra/jit_lto_kernels/search_single_cta_planner.hpp | 9 +++++++-- 8 files changed, 26 insertions(+), 6 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in index 7b8fcf0b31..2fb1d6f918 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in @@ -18,6 +18,7 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( "compute_distance_to_child_nodes@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in index 19cbfb1527..23fce72704 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in @@ -18,6 +18,7 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( "random_pickup@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in index df37bdd0d3..689428276c 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in @@ -18,6 +18,7 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( "search_multi_cta@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index 629a44396f..ab819d4bef 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -34,8 +34,13 @@ struct CagraMultiCtaSearchPlanner uint32_t pq_len = 0) : CagraPlannerBase( build_entrypoint_name(metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len), - is_vpq ? make_fragment_key() - : make_fragment_key()) + is_vpq ? make_fragment_key() + : make_fragment_key()) { } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index bd0f5e4324..6d1ac46e35 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -38,8 +38,13 @@ struct CagraMultiKernelSearchPlanner (kernel_name == "apply_filter_kernel") ? make_fragment_key() : (is_vpq - ? make_fragment_key() - : make_fragment_key())) + ? make_fragment_key() + : make_fragment_key())) { } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in index 4756969487..b121fa36a0 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in @@ -18,6 +18,7 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( "search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in index cbcffa10ae..a943d9aeab 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in @@ -18,6 +18,7 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( "search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index 3578e87538..24da6d42ad 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -44,8 +44,13 @@ struct CagraSingleCtaSearchPlanner pq_bits, pq_len, persistent), - is_vpq ? make_fragment_key() - : make_fragment_key()) + is_vpq ? make_fragment_key() + : make_fragment_key()) { } From b52f8c26141a4b775458aa5fe2d3adc500a986a8 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 27 Feb 2026 20:12:20 +0000 Subject: [PATCH 142/158] Refactor and make thread-safe --- cpp/src/neighbors/detail/smem_utils.cuh | 93 ++++++++++++------------- 1 file changed, 45 insertions(+), 48 deletions(-) diff --git a/cpp/src/neighbors/detail/smem_utils.cuh b/cpp/src/neighbors/detail/smem_utils.cuh index 3d8f5cb2eb..8e9cde4ff2 100644 --- a/cpp/src/neighbors/detail/smem_utils.cuh +++ b/cpp/src/neighbors/detail/smem_utils.cuh @@ -14,6 +14,38 @@ namespace cuvs::neighbors::detail { +template +void safely_launch_kernel_with_smem_size_impl(KernelT const& kernel, + uint32_t smem_size, + KernelLauncherT const& launch, + std::mutex& mutex, + std::atomic& current_smem_size) +{ + auto last_smem_size = current_smem_size.load(std::memory_order_relaxed); + if (smem_size > last_smem_size) { + // We still need a mutex for the critical section: actualize last_smem_size and set the + // attribute. + auto guard = std::lock_guard{mutex}; + if (!current_smem_size.compare_exchange_strong( + last_smem_size, smem_size, std::memory_order_relaxed, std::memory_order_relaxed)) { + // The value has been updated by another thread between the load and the mutex acquisition. + if (smem_size > last_smem_size) { + current_smem_size.store(smem_size, std::memory_order_relaxed); + } + } + // Only update if the last seen value is smaller than the new one. + if (smem_size > last_smem_size) { + auto launch_status = + cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size); + RAFT_EXPECTS(launch_status == cudaSuccess, + "Failed to set max dynamic shared memory size to %u bytes", + smem_size); + } + } + // We don't need to guard the kernel launch because the smem_size can only grow. + return launch(kernel); +} + /** * @brief (Thread-)Safely invoke a kernel with a maximum dynamic shared memory size. * This is required because the sequence `cudaFuncSetAttribute` + kernel launch is not executed @@ -35,32 +67,17 @@ void safely_launch_kernel_with_smem_size(cudaKernel_t kernel, KernelLauncherT const& launch) { // For JIT kernels, track by kernel pointer since all cudaKernel_t have the same type - static std::unordered_map> jit_smem_sizes; + static std::unordered_map>> + jit_smem_sizes; + std::mutex map_mutex; - auto& current_smem_size = jit_smem_sizes[kernel]; - auto last_smem_size = current_smem_size.load(std::memory_order_relaxed); - - if (smem_size > last_smem_size) { - static std::mutex jit_mutex; - std::lock_guard guard(jit_mutex); - if (!current_smem_size.compare_exchange_strong( - last_smem_size, smem_size, std::memory_order_relaxed, std::memory_order_relaxed)) { - // The value has been updated by another thread between the load and the mutex acquisition. - if (smem_size > last_smem_size) { - current_smem_size.store(smem_size, std::memory_order_relaxed); - } - } - // Only update if the last seen value is smaller than the new one. - if (smem_size > last_smem_size) { - auto launch_status = - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size); - RAFT_EXPECTS(launch_status == cudaSuccess, - "Failed to set max dynamic shared memory size to %u bytes", - smem_size); - } + std::pair>* current_smem_size; + { + std::lock_guard map_lock{map_mutex}; + current_smem_size = &jit_smem_sizes[kernel]; } - - return launch(kernel); + safely_launch_kernel_with_smem_size_impl( + kernel, smem_size, launch, current_smem_size->first, current_smem_size->second); } // General template for regular function pointers @@ -71,30 +88,10 @@ void safely_launch_kernel_with_smem_size(KernelT const& kernel, { // the last smem size is parameterized by the kernel thanks to the template parameter. static std::atomic current_smem_size{0}; - auto last_smem_size = current_smem_size.load(std::memory_order_relaxed); - if (smem_size > last_smem_size) { - // We still need a mutex for the critical section: actualize last_smem_size and set the - // attribute. - static auto mutex = std::mutex{}; - auto guard = std::lock_guard{mutex}; - if (!current_smem_size.compare_exchange_strong( - last_smem_size, smem_size, std::memory_order_relaxed, std::memory_order_relaxed)) { - // The value has been updated by another thread between the load and the mutex acquisition. - if (smem_size > last_smem_size) { - current_smem_size.store(smem_size, std::memory_order_relaxed); - } - } - // Only update if the last seen value is smaller than the new one. - if (smem_size > last_smem_size) { - auto launch_status = - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size); - RAFT_EXPECTS(launch_status == cudaSuccess, - "Failed to set max dynamic shared memory size to %u bytes", - smem_size); - } - } - // We don't need to guard the kernel launch because the smem_size can only grow. - return launch(kernel); + static std::mutex mutex; + + safely_launch_kernel_with_smem_size_impl( + kernel, smem_size, launch, mutex, current_smem_size); } } // namespace cuvs::neighbors::detail From 0349746cfdfc02071e85b68f3fc0a6f820887381 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 27 Feb 2026 22:50:49 +0000 Subject: [PATCH 143/158] remove prints --- cpp/src/neighbors/detail/cagra/device_common.hpp | 15 +-------------- .../detail/cagra/search_multi_cta_kernel-inl.cuh | 10 ---------- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/device_common.hpp b/cpp/src/neighbors/detail/cagra/device_common.hpp index c1bfa5f558..1cc7772476 100644 --- a/cpp/src/neighbors/detail/cagra/device_common.hpp +++ b/cpp/src/neighbors/detail/cagra/device_common.hpp @@ -150,13 +150,6 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes( } result_distances_ptr[i] = best_norm2_team_local; result_indices_ptr[i] = best_index_team_local; - // Debug: print first few random node distances - if (i < 3 && block_id == 0) { - printf("NON-JIT random: i=%u idx=%u dist=%.6f\n", - i, - best_index_team_local, - (float)best_norm2_team_local); - } } } } @@ -240,13 +233,7 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes( __syncwarp(); // Store the distance - if (valid_i && lead_lane) { - result_child_distances_ptr[j] = child_dist; - // Debug: print first few child node distances - if (j < 3 && threadIdx.x < 32) { - printf("NON-JIT child: j=%u idx=%u dist=%.6f\n", j, child_id, (float)child_dist); - } - } + if (valid_i && lead_lane) { result_child_distances_ptr[j] = child_dist; } } } diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 1d6dac7b78..be1a7a1a56 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -461,16 +461,6 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel( if (result_distances_ptr != nullptr) { DISTANCE_T dist = result_distances_buffer[i]; result_distances_ptr[k] = dist; - // Debug: print first query, first CTA, first few results - if (query_id == 0 && cta_id == 0 && j < 5) { - printf("NON-JIT: query=%u cta=%u j=%u i=%u idx=%u dist=%.6f\n", - query_id, - cta_id, - j, - i, - index & ~index_msb_1_mask, - (float)dist); - } } } else { // If it is valid and registered in the traversed hash table but is From 6e07abb336f117547bb5194728d925aa3eb05ad5 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 27 Feb 2026 23:51:17 +0000 Subject: [PATCH 144/158] remove unnecessary includes --- .../jit_lto_kernels/dist_op_cosine_impl.cuh | 4 - .../jit_lto_kernels/dist_op_hamming_impl.cuh | 6 - .../dist_op_inner_product_impl.cuh | 4 - .../jit_lto_kernels/dist_op_kernel.cu.in | 2 + .../cagra/jit_lto_kernels/dist_op_l2_impl.cuh | 4 - .../extern_device_functions.cuh | 19 - .../jit_lto_kernels/search_multi_cta_jit.cuh | 25 +- .../jit_lto_kernels/search_multi_jit.cuh | 17 +- .../search_single_cta_device_helpers.cuh | 663 ++++++++++++++++++ .../jit_lto_kernels/search_single_cta_jit.cuh | 56 +- 10 files changed, 683 insertions(+), 117 deletions(-) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_device_helpers.cuh diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh index 908fc2600a..ba6c270fa1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_cosine_impl.cuh @@ -5,12 +5,8 @@ #pragma once -#include - namespace cuvs::neighbors::cagra::detail { -// dist_op fragment for CosineExpanded metric (same as InnerProduct) -// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) template __device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh index 9dfb23001e..cd4ed29ac6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_hamming_impl.cuh @@ -5,17 +5,11 @@ #pragma once -#include -#include - namespace cuvs::neighbors::cagra::detail { -// dist_op fragment for BitwiseHamming metric -// QueryT is uint8_t for BitwiseHamming template __device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) { - // mask the result of xor for the integer promotion const auto v = (a ^ b) & 0xffu; return __popc(v); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh index b02dc566f2..ba6c270fa1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_inner_product_impl.cuh @@ -5,12 +5,8 @@ #pragma once -#include - namespace cuvs::neighbors::cagra::detail { -// dist_op fragment for InnerProduct metric -// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) template __device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in index 5e18a3ba4c..d5d2bb2cfc 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_kernel.cu.in @@ -3,6 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +#include + #include namespace cuvs::neighbors::cagra::detail { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh index 31dac7d2ed..f74b62b4b0 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l2_impl.cuh @@ -5,12 +5,8 @@ #pragma once -#include - namespace cuvs::neighbors::cagra::detail { -// dist_op fragment for L2Expanded metric -// QueryT can be float (for most metrics) or uint8_t (for BitwiseHamming) template __device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh index cf4de6e486..81f5d56f23 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -10,25 +10,6 @@ namespace cuvs::neighbors::cagra::detail { -template -struct standard_dataset_descriptor_t; - -template -struct cagra_q_dataset_descriptor_t; - template // For DistanceType enum -#include // For raft::upper_bound -#include // For raft::round_up_safe +#include #include #include @@ -22,29 +18,16 @@ #include #endif -// Include extern function declarations before namespace so they're available to kernel definitions #include "../../jit_lto_kernels/filter_data.h" -#include "extern_device_functions.cuh" -// Include shared JIT device functions before namespace so they're available to kernel definitions #include "device_common_jit.cuh" +#include "extern_device_functions.cuh" namespace cuvs::neighbors::cagra::detail::multi_cta_search { -// Helper to check if DescriptorT has kPqBits (VPQ descriptor) - use shared version -// Use fully qualified name since it's a template variable -using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; - -// sample_filter is declared in extern_device_functions.cuh -using cuvs::neighbors::detail::sample_filter; - -// JIT versions of compute_distance_to_random_nodes and compute_distance_to_child_nodes -// are now shared in device_common_jit.cuh - use fully qualified names using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; - -// JIT version of search_kernel - uses dataset_descriptor_base_t* pointer -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT -// Filter is linked separately via JIT LTO, so we use none_sample_filter directly +using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; +using cuvs::neighbors::detail::sample_filter; template // For DistanceType enum -#include // For raft::upper_bound - #include #include -#include // For std::is_same_v, std::true_type, std::false_type +#include -// Include extern function declarations before namespace so they're available to kernel definitions #include "../../jit_lto_kernels/filter_data.h" #include "extern_device_functions.cuh" namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -// Helper to check if DescriptorT has kPqBits (VPQ descriptor) template struct has_kpq_bits { template @@ -36,8 +29,6 @@ struct has_kpq_bits { template inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; -// JIT version of random_pickup_kernel - uses dataset_descriptor_base_t* pointer -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT template +#include + +#include + +#include +#include +#include +#include + +#include +#include // For uint4 + +namespace cuvs::neighbors::cagra::detail::single_cta_search { + +// Constants for persistent kernels +constexpr size_t kCacheLineBytes = 64; +constexpr uint32_t kMaxJobsNum = 8192; + +// Worker handle for persistent kernels +struct alignas(kCacheLineBytes) worker_handle_t { + using handle_t = uint64_t; + struct value_t { + uint32_t desc_id; + uint32_t query_id; + }; + union data_t { + handle_t handle; + value_t value; + }; + cuda::atomic data; +}; +static_assert(sizeof(worker_handle_t::value_t) == sizeof(worker_handle_t::handle_t)); +static_assert( + cuda::atomic::is_always_lock_free); + +constexpr worker_handle_t::handle_t kWaitForWork = std::numeric_limits::max(); +constexpr worker_handle_t::handle_t kNoMoreWork = kWaitForWork - 1; + +// Job descriptor for persistent kernels +template +struct alignas(kCacheLineBytes) job_desc_t { + using index_type = typename DATASET_DESCRIPTOR_T::INDEX_T; + using distance_type = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using data_type = typename DATASET_DESCRIPTOR_T::DATA_T; + // The algorithm input parameters + struct value_t { + uintptr_t result_indices_ptr; // [num_queries, top_k] + distance_type* result_distances_ptr; // [num_queries, top_k] + const data_type* queries_ptr; // [num_queries, dataset_dim] + uint32_t top_k; + uint32_t n_queries; + }; + using blob_elem_type = uint4; + constexpr static inline size_t kBlobSize = + raft::div_rounding_up_safe(sizeof(value_t), sizeof(blob_elem_type)); + // Union facilitates loading the input by a warp in a single request + union input_t { + blob_elem_type blob[kBlobSize]; // NOLINT + value_t value; + } input; + // Last thread triggers this flag. + cuda::atomic completion_flag; +}; + +// Pick up next parent nodes from the internal topk list +template +RAFT_DEVICE_INLINE_FUNCTION void pickup_next_parents(std::uint32_t* const terminate_flag, + INDEX_T* const next_parent_indices, + INDEX_T* const internal_topk_indices, + const std::size_t internal_topk_size, + const std::uint32_t search_width) +{ + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + + for (std::uint32_t i = threadIdx.x; i < search_width; i += 32) { + next_parent_indices[i] = utils::get_max_value(); + } + std::uint32_t itopk_max = internal_topk_size; + if (itopk_max % 32) { itopk_max += 32 - (itopk_max % 32); } + std::uint32_t num_new_parents = 0; + for (std::uint32_t j = threadIdx.x; j < itopk_max; j += 32) { + std::uint32_t jj = j; + if (TOPK_BY_BITONIC_SORT) { jj = device::swizzling(j); } + INDEX_T index; + int new_parent = 0; + if (j < internal_topk_size) { + index = internal_topk_indices[jj]; + if ((index & index_msb_1_mask) == 0) { // check if most significant bit is set + new_parent = 1; + } + } + const std::uint32_t ballot_mask = __ballot_sync(0xffffffff, new_parent); + if (new_parent) { + const auto i = __popc(ballot_mask & ((1 << threadIdx.x) - 1)) + num_new_parents; + if (i < search_width) { + next_parent_indices[i] = jj; + // set most significant bit as used node + internal_topk_indices[jj] |= index_msb_1_mask; + } + } + num_new_parents += __popc(ballot_mask); + if (num_new_parents >= search_width) { break; } + } + if (threadIdx.x == 0 && (num_new_parents == 0)) { *terminate_flag = 1; } +} + +// Helper function for bitonic sort and full +template +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_and_full( + float* candidate_distances, // [num_candidates] + IdxT* candidate_indices, // [num_candidates] + const std::uint32_t num_candidates, + const std::uint32_t num_itopk) +{ + const unsigned lane_id = threadIdx.x % raft::warp_size(); + const unsigned warp_id = threadIdx.x / raft::warp_size(); + static_assert(MAX_CANDIDATES <= 256); + if constexpr (!MULTI_WARPS) { + if (warp_id > 0) { return; } + constexpr unsigned N = (MAX_CANDIDATES + (raft::warp_size() - 1)) / raft::warp_size(); + float key[N]; + IdxT val[N]; + /* Candidates -> Reg */ + for (unsigned i = 0; i < N; i++) { + unsigned j = lane_id + (raft::warp_size() * i); + if (j < num_candidates) { + key[i] = candidate_distances[j]; + val[i] = candidate_indices[j]; + } else { + key[i] = utils::get_max_value(); + val[i] = utils::get_max_value(); + } + } + /* Sort */ + bitonic::warp_sort(key, val); + /* Reg -> Temp_itopk */ + for (unsigned i = 0; i < N; i++) { + unsigned j = (N * lane_id) + i; + if (j < num_candidates && j < num_itopk) { + candidate_distances[device::swizzling(j)] = key[i]; + candidate_indices[device::swizzling(j)] = val[i]; + } + } + } else { + assert(blockDim.x >= 64); + // Use two warps (64 threads) + constexpr unsigned max_candidates_per_warp = (MAX_CANDIDATES + 1) / 2; + static_assert(max_candidates_per_warp <= 128); + constexpr unsigned N = (max_candidates_per_warp + (raft::warp_size() - 1)) / raft::warp_size(); + float key[N]; + IdxT val[N]; + if (warp_id < 2) { + /* Candidates -> Reg */ + for (unsigned i = 0; i < N; i++) { + unsigned jl = lane_id + (raft::warp_size() * i); + unsigned j = jl + (max_candidates_per_warp * warp_id); + if (j < num_candidates) { + key[i] = candidate_distances[j]; + val[i] = candidate_indices[j]; + } else { + key[i] = utils::get_max_value(); + val[i] = utils::get_max_value(); + } + } + /* Sort */ + bitonic::warp_sort(key, val); + /* Reg -> Temp_candidates */ + for (unsigned i = 0; i < N; i++) { + unsigned jl = (N * lane_id) + i; + unsigned j = jl + (max_candidates_per_warp * warp_id); + if (j < num_candidates && jl < num_itopk) { + candidate_distances[device::swizzling(j)] = key[i]; + candidate_indices[device::swizzling(j)] = val[i]; + } + } + } + __syncthreads(); + + unsigned num_warps_used = (num_itopk + max_candidates_per_warp - 1) / max_candidates_per_warp; + if (warp_id < num_warps_used) { + /* Temp_candidates -> Reg */ + for (unsigned i = 0; i < N; i++) { + unsigned jl = (N * lane_id) + i; + unsigned kl = max_candidates_per_warp - 1 - jl; + unsigned j = jl + (max_candidates_per_warp * warp_id); + unsigned k = MAX_CANDIDATES - 1 - j; + if (j >= num_candidates || k >= num_candidates || kl >= num_itopk) continue; + float temp_key = candidate_distances[device::swizzling(k)]; + if (key[i] == temp_key) continue; + if ((warp_id == 0) == (key[i] > temp_key)) { + key[i] = temp_key; + val[i] = candidate_indices[device::swizzling(k)]; + } + } + } + if (num_warps_used > 1) { __syncthreads(); } + if (warp_id < num_warps_used) { + /* Merge */ + bitonic::warp_merge(key, val, raft::warp_size()); + /* Reg -> Temp_itopk */ + for (unsigned i = 0; i < N; i++) { + unsigned jl = (N * lane_id) + i; + unsigned j = jl + (max_candidates_per_warp * warp_id); + if (j < num_candidates && j < num_itopk) { + candidate_distances[device::swizzling(j)] = key[i]; + candidate_indices[device::swizzling(j)] = val[i]; + } + } + } + if (num_warps_used > 1) { __syncthreads(); } + } +} + +// Wrapper functions to avoid pre-inlining (impacts register pressure) +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_and_full_wrapper_64_false( + float* candidate_distances, // [num_candidates] + std::uint32_t* candidate_indices, // [num_candidates] + const std::uint32_t num_candidates, + const std::uint32_t num_itopk) +{ + topk_by_bitonic_sort_and_full<64, false, uint32_t>( + candidate_distances, candidate_indices, num_candidates, num_itopk); +} + +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_and_full_wrapper_128_false( + float* candidate_distances, // [num_candidates] + std::uint32_t* candidate_indices, // [num_candidates] + const std::uint32_t num_candidates, + const std::uint32_t num_itopk) +{ + topk_by_bitonic_sort_and_full<128, false, uint32_t>( + candidate_distances, candidate_indices, num_candidates, num_itopk); +} + +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_and_full_wrapper_256_false( + float* candidate_distances, // [num_candidates] + std::uint32_t* candidate_indices, // [num_candidates] + const std::uint32_t num_candidates, + const std::uint32_t num_itopk) +{ + topk_by_bitonic_sort_and_full<256, false, uint32_t>( + candidate_distances, candidate_indices, num_candidates, num_itopk); +} + +// TopK by bitonic sort and merge (template version with MAX_ITOPK) +template +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_and_merge( + float* itopk_distances, // [num_itopk] + IdxT* itopk_indices, // [num_itopk] + const std::uint32_t num_itopk, + float* candidate_distances, // [num_candidates] + IdxT* candidate_indices, // [num_candidates] + const std::uint32_t num_candidates, + std::uint32_t* work_buf, + const bool first) +{ + const unsigned lane_id = threadIdx.x % raft::warp_size(); + const unsigned warp_id = threadIdx.x / raft::warp_size(); + + static_assert(MAX_ITOPK <= 512); + if constexpr (!MULTI_WARPS) { + static_assert(MAX_ITOPK <= 256); + if (warp_id > 0) { return; } + constexpr unsigned N = (MAX_ITOPK + (raft::warp_size() - 1)) / raft::warp_size(); + float key[N]; + IdxT val[N]; + if (first) { + /* Load itopk results */ + for (unsigned i = 0; i < N; i++) { + unsigned j = lane_id + (raft::warp_size() * i); + if (j < num_itopk) { + key[i] = itopk_distances[j]; + val[i] = itopk_indices[j]; + } else { + key[i] = utils::get_max_value(); + val[i] = utils::get_max_value(); + } + } + /* Warp Sort */ + bitonic::warp_sort(key, val); + } else { + /* Load itopk results */ + for (unsigned i = 0; i < N; i++) { + unsigned j = (N * lane_id) + i; + if (j < num_itopk) { + key[i] = itopk_distances[device::swizzling(j)]; + val[i] = itopk_indices[device::swizzling(j)]; + } else { + key[i] = utils::get_max_value(); + val[i] = utils::get_max_value(); + } + } + } + /* Merge candidates */ + for (unsigned i = 0; i < N; i++) { + unsigned j = (N * lane_id) + i; // [0:max_itopk-1] + unsigned k = MAX_ITOPK - 1 - j; + if (k >= num_itopk || k >= num_candidates) continue; + float candidate_key = candidate_distances[device::swizzling(k)]; + if (key[i] > candidate_key) { + key[i] = candidate_key; + val[i] = candidate_indices[device::swizzling(k)]; + } + } + /* Warp Merge */ + bitonic::warp_merge(key, val, raft::warp_size()); + /* Store new itopk results */ + for (unsigned i = 0; i < N; i++) { + unsigned j = (N * lane_id) + i; + if (j < num_itopk) { + itopk_distances[device::swizzling(j)] = key[i]; + itopk_indices[device::swizzling(j)] = val[i]; + } + } + } else { + static_assert(MAX_ITOPK == 512); + assert(blockDim.x >= 64); + // Use two warps (64 threads) or more + constexpr unsigned max_itopk_per_warp = (MAX_ITOPK + 1) / 2; + constexpr unsigned N = (max_itopk_per_warp + (raft::warp_size() - 1)) / raft::warp_size(); + float key[N]; + IdxT val[N]; + if (first) { + /* Load itop results (not sorted) */ + if (warp_id < 2) { + for (unsigned i = 0; i < N; i++) { + unsigned j = lane_id + (raft::warp_size() * i) + (max_itopk_per_warp * warp_id); + if (j < num_itopk) { + key[i] = itopk_distances[j]; + val[i] = itopk_indices[j]; + } else { + key[i] = utils::get_max_value(); + val[i] = utils::get_max_value(); + } + } + /* Warp Sort */ + bitonic::warp_sort(key, val); + /* Store intermedidate results */ + for (unsigned i = 0; i < N; i++) { + unsigned j = (N * threadIdx.x) + i; + if (j >= num_itopk) continue; + itopk_distances[device::swizzling(j)] = key[i]; + itopk_indices[device::swizzling(j)] = val[i]; + } + } + __syncthreads(); + if (warp_id < 2) { + /* Load intermedidate results */ + for (unsigned i = 0; i < N; i++) { + unsigned j = (N * threadIdx.x) + i; + unsigned k = MAX_ITOPK - 1 - j; + if (k >= num_itopk) continue; + float temp_key = itopk_distances[device::swizzling(k)]; + if (key[i] == temp_key) continue; + if ((warp_id == 0) == (key[i] > temp_key)) { + key[i] = temp_key; + val[i] = itopk_indices[device::swizzling(k)]; + } + } + /* Warp Merge */ + bitonic::warp_merge(key, val, raft::warp_size()); + } + __syncthreads(); + /* Store itopk results (sorted) */ + if (warp_id < 2) { + for (unsigned i = 0; i < N; i++) { + unsigned j = (N * threadIdx.x) + i; + if (j >= num_itopk) continue; + itopk_distances[device::swizzling(j)] = key[i]; + itopk_indices[device::swizzling(j)] = val[i]; + } + } + } + const uint32_t num_itopk_div2 = num_itopk / 2; + if (threadIdx.x < 3) { + // work_buf is used to obtain turning points in 1st and 2nd half of itopk afer merge. + work_buf[threadIdx.x] = num_itopk_div2; + } + __syncthreads(); + + // Merge candidates (using whole threads) + for (unsigned k = threadIdx.x; k < (num_candidates < num_itopk ? num_candidates : num_itopk); + k += blockDim.x) { + const unsigned j = num_itopk - 1 - k; + const float itopk_key = itopk_distances[device::swizzling(j)]; + const float candidate_key = candidate_distances[device::swizzling(k)]; + if (itopk_key > candidate_key) { + itopk_distances[device::swizzling(j)] = candidate_key; + itopk_indices[device::swizzling(j)] = candidate_indices[device::swizzling(k)]; + if (j < num_itopk_div2) { + atomicMin(work_buf + 2, j); + } else { + atomicMin(work_buf + 1, j - num_itopk_div2); + } + } + } + __syncthreads(); + + // Merge 1st and 2nd half of itopk (using whole threads) + for (unsigned j = threadIdx.x; j < num_itopk_div2; j += blockDim.x) { + const unsigned k = j + num_itopk_div2; + float key_0 = itopk_distances[device::swizzling(j)]; + float key_1 = itopk_distances[device::swizzling(k)]; + if (key_0 > key_1) { + itopk_distances[device::swizzling(j)] = key_1; + itopk_distances[device::swizzling(k)] = key_0; + IdxT val_0 = itopk_indices[device::swizzling(j)]; + IdxT val_1 = itopk_indices[device::swizzling(k)]; + itopk_indices[device::swizzling(j)] = val_1; + itopk_indices[device::swizzling(k)] = val_0; + atomicMin(work_buf + 0, j); + } + } + if (threadIdx.x == blockDim.x - 1) { + if (work_buf[2] < num_itopk_div2) { work_buf[1] = work_buf[2]; } + } + __syncthreads(); + // Warp-0 merges 1st half of itopk, warp-1 does 2nd half. + if (warp_id < 2) { + // Load intermedidate itopk results + const uint32_t turning_point = work_buf[warp_id]; // turning_point <= num_itopk_div2 + for (unsigned i = 0; i < N; i++) { + unsigned k = num_itopk; + unsigned j = (N * lane_id) + i; + if (j < turning_point) { + k = j + (num_itopk_div2 * warp_id); + } else if (j >= (MAX_ITOPK / 2 - num_itopk_div2)) { + j -= (MAX_ITOPK / 2 - num_itopk_div2); + if ((turning_point <= j) && (j < num_itopk_div2)) { k = j + (num_itopk_div2 * warp_id); } + } + if (k < num_itopk) { + key[i] = itopk_distances[device::swizzling(k)]; + val[i] = itopk_indices[device::swizzling(k)]; + } else { + key[i] = utils::get_max_value(); + val[i] = utils::get_max_value(); + } + } + /* Warp Merge */ + bitonic::warp_merge(key, val, raft::warp_size()); + /* Store new itopk results */ + for (unsigned i = 0; i < N; i++) { + const unsigned j = (N * lane_id) + i; + if (j < num_itopk_div2) { + unsigned k = j + (num_itopk_div2 * warp_id); + itopk_distances[device::swizzling(k)] = key[i]; + itopk_indices[device::swizzling(k)] = val[i]; + } + } + } + } +} + +// Wrapper functions to avoid pre-inlining +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_and_merge_wrapper_64_false( + float* itopk_distances, // [num_itopk] + uint32_t* itopk_indices, // [num_itopk] + const std::uint32_t num_itopk, + float* candidate_distances, // [num_candidates] + uint32_t* candidate_indices, // [num_candidates] + const std::uint32_t num_candidates, + std::uint32_t* work_buf, + const bool first) +{ + topk_by_bitonic_sort_and_merge<64, false, uint32_t>(itopk_distances, + itopk_indices, + num_itopk, + candidate_distances, + candidate_indices, + num_candidates, + work_buf, + first); +} + +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_and_merge_wrapper_128_false( + float* itopk_distances, // [num_itopk] + uint32_t* itopk_indices, // [num_itopk] + const std::uint32_t num_itopk, + float* candidate_distances, // [num_candidates] + uint32_t* candidate_indices, // [num_candidates] + const std::uint32_t num_candidates, + std::uint32_t* work_buf, + const bool first) +{ + topk_by_bitonic_sort_and_merge<128, false, uint32_t>(itopk_distances, + itopk_indices, + num_itopk, + candidate_distances, + candidate_indices, + num_candidates, + work_buf, + first); +} + +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_and_merge_wrapper_256_false( + float* itopk_distances, // [num_itopk] + uint32_t* itopk_indices, // [num_itopk] + const std::uint32_t num_itopk, + float* candidate_distances, // [num_candidates] + uint32_t* candidate_indices, // [num_candidates] + const std::uint32_t num_candidates, + std::uint32_t* work_buf, + const bool first) +{ + topk_by_bitonic_sort_and_merge<256, false, uint32_t>(itopk_distances, + itopk_indices, + num_itopk, + candidate_distances, + candidate_indices, + num_candidates, + work_buf, + first); +} + +// TopK by bitonic sort and merge (runtime version) +template +RAFT_DEVICE_INLINE_FUNCTION void topk_by_bitonic_sort_and_merge( + float* itopk_distances, // [num_itopk] + IdxT* itopk_indices, // [num_itopk] + const std::uint32_t max_itopk, + const std::uint32_t num_itopk, + float* candidate_distances, // [num_candidates] + IdxT* candidate_indices, // [num_candidates] + const std::uint32_t max_candidates, + const std::uint32_t num_candidates, + std::uint32_t* work_buf, + const bool first) +{ + static_assert(std::is_same_v); + assert(max_itopk <= 512); + assert(max_candidates <= 256); + assert(!MULTI_WARPS || blockDim.x >= 64); + + // use a non-template wrapper function to avoid pre-inlining the topk_by_bitonic_sort_and_full + // function (vs post-inlining, this impacts register pressure) + if (max_candidates <= 64) { + topk_by_bitonic_sort_and_full_wrapper_64_false( + candidate_distances, candidate_indices, num_candidates, num_itopk); + } else if (max_candidates <= 128) { + topk_by_bitonic_sort_and_full_wrapper_128_false( + candidate_distances, candidate_indices, num_candidates, num_itopk); + } else { + topk_by_bitonic_sort_and_full_wrapper_256_false( + candidate_distances, candidate_indices, num_candidates, num_itopk); + } + + if constexpr (!MULTI_WARPS) { + assert(max_itopk <= 256); + // use a non-template wrapper function to avoid pre-inlining the topk_by_bitonic_sort_and_merge + // function (vs post-inlining, this impacts register pressure) + if (max_itopk <= 64) { + topk_by_bitonic_sort_and_merge_wrapper_64_false(itopk_distances, + itopk_indices, + num_itopk, + candidate_distances, + candidate_indices, + num_candidates, + work_buf, + first); + } else if (max_itopk <= 128) { + topk_by_bitonic_sort_and_merge_wrapper_128_false(itopk_distances, + itopk_indices, + num_itopk, + candidate_distances, + candidate_indices, + num_candidates, + work_buf, + first); + } else { + topk_by_bitonic_sort_and_merge_wrapper_256_false(itopk_distances, + itopk_indices, + num_itopk, + candidate_distances, + candidate_indices, + num_candidates, + work_buf, + first); + } + } else { + assert(max_itopk > 256); + topk_by_bitonic_sort_and_merge<512, MULTI_WARPS, uint32_t>(itopk_distances, + itopk_indices, + num_itopk, + candidate_distances, + candidate_indices, + num_candidates, + work_buf, + first); + } +} + +// This function move the invalid index element to the end of the itopk list. +// Require : array_length % 32 == 0 && The invalid entry is only one. +template +RAFT_DEVICE_INLINE_FUNCTION void move_invalid_to_end_of_list(IdxT* const index_array, + float* const distance_array, + const std::uint32_t array_length) +{ + constexpr std::uint32_t warp_size = 32; + constexpr std::uint32_t invalid_index = utils::get_max_value(); + const std::uint32_t lane_id = threadIdx.x % warp_size; + + if (threadIdx.x >= warp_size) { return; } + + bool found_invalid = false; + if (array_length % warp_size == 0) { + for (std::uint32_t i = lane_id; i < array_length; i += warp_size) { + const auto index = index_array[i]; + const auto distance = distance_array[i]; + + if (found_invalid) { + index_array[i - 1] = index; + distance_array[i - 1] = distance; + } else { + // Check if the index is invalid + const auto I_found_invalid = (index == invalid_index); + const auto who_has_invalid = raft::ballot(I_found_invalid); + // if a value that is loaded by a smaller lane id thread, shift the array + if (who_has_invalid << (warp_size - lane_id)) { + index_array[i - 1] = index; + distance_array[i - 1] = distance; + } + + found_invalid = who_has_invalid; + } + } + } + if (lane_id == 0) { + index_array[array_length - 1] = invalid_index; + distance_array[array_length - 1] = utils::get_max_value(); + } +} + +template +RAFT_DEVICE_INLINE_FUNCTION void hashmap_restore(INDEX_T* const hashmap_ptr, + const size_t hashmap_bitlen, + const INDEX_T* itopk_indices, + const uint32_t itopk_size, + const uint32_t first_tid = 0) +{ + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + if (threadIdx.x < first_tid) return; + for (unsigned i = threadIdx.x - first_tid; i < itopk_size; i += blockDim.x - first_tid) { + auto key = itopk_indices[i] & ~index_msb_1_mask; // clear most significant bit + hashmap::insert(hashmap_ptr, hashmap_bitlen, key); + } +} + +} // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh index fcc851b3b5..9d21c13d3f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh @@ -5,60 +5,30 @@ #pragma once +// Device-only helpers - extracted from search_single_cta_kernel-inl.cuh to avoid host-side includes +#include "search_single_cta_device_helpers.cuh" + +// Additional device-side includes needed #include "../compute_distance-ext.cuh" -#include "../compute_distance_standard-impl.cuh" -#include "../compute_distance_vpq-impl.cuh" #include "../device_common.hpp" #include "../hashmap.hpp" -#include "../search_single_cta_kernel-inl.cuh" -#include "../utils.hpp" - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "../bitonic.hpp" -#include "../search_plan.cuh" #include "../topk_by_radix.cuh" -#include "../topk_for_cagra/topk.h" - -#include - -#include -#include -#include -#include +#include "../utils.hpp" -#include -#include +#include // For raft::shfl_xor +#include // For raft::round_up_safe #include -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include // For std::is_same_v -#include +#include // For assert() + +#ifdef _CLK_BREAKDOWN +#include // For printf() in debug mode +#endif // Include extern function declarations before namespace so they're available to kernel definitions #include "../../jit_lto_kernels/filter_data.h" From e9e2ff04c308b81a1f3e9a69ce6f42eff34a0fc8 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 4 Mar 2026 06:06:29 +0000 Subject: [PATCH 145/158] Don't build fatbins with debug symbols --- cpp/CMakeLists.txt | 13 +++++++++---- cpp/bench/ann/CMakeLists.txt | 8 +++++--- cpp/cmake/modules/ConfigureCUDA.cmake | 9 +++------ cpp/tests/CMakeLists.txt | 6 ++++-- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8c33623615..f1d66258aa 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -335,8 +335,10 @@ if(NOT BUILD_CPU_ONLY) $ ) target_compile_options( - cuvs-cagra-search PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" + cuvs-cagra-search + PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + "$<$,$>:${CUVS_DEBUG_CUDA_FLAGS}>" ) if(BUILD_MG_ALGOS) @@ -818,8 +820,10 @@ if(NOT BUILD_CPU_ONLY) POSITION_INDEPENDENT_CODE ON ) target_compile_options( - cuvs_objs PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" + cuvs_objs + PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + "$<$,$>:${CUVS_DEBUG_CUDA_FLAGS}>" ) target_compile_definitions( @@ -899,6 +903,7 @@ if(NOT BUILD_CPU_ONLY) --expt-relaxed-constexpr> PRIVATE "$<$:${CUVS_CXX_FLAGS}>" "$<$:${CUVS_CUDA_FLAGS}>" + "$<$,$>:${CUVS_DEBUG_CUDA_FLAGS}>" ) target_compile_definitions( cuvs diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 8d254c0933..4e4527267c 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -190,8 +190,10 @@ function(ConfigureAnnBench) set(${ConfigureAnnBench_CXXFLAGS} ${CUVS_CXX_FLAGS} ${ConfigureAnnBench_CXXFLAGS}) target_compile_options( - ${BENCH_NAME} PRIVATE "$<$:${ConfigureAnnBench_CXXFLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" + ${BENCH_NAME} + PRIVATE "$<$:${ConfigureAnnBench_CXXFLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + "$<$,$>:${CUVS_DEBUG_CUDA_FLAGS}>" ) if(CUVS_ANN_BENCH_USE_${ConfigureAnnBench_NAME}) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index c6d51b1db0..0b50d78707 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -55,8 +55,5 @@ if(OpenMP_FOUND) endif() # Debug options -if(CMAKE_BUILD_TYPE MATCHES Debug) - message(VERBOSE "cuVS: Building with debugging flags") - list(APPEND CUVS_CUDA_FLAGS -G -Xcompiler=-rdynamic --maxrregcount=64) - list(APPEND CUVS_CUDA_FLAGS -Xptxas --suppress-stack-size-warning) -endif() +list(APPEND CUVS_DEBUG_CUDA_FLAGS -G -Xcompiler=-rdynamic --maxrregcount=64) +list(APPEND CUVS_DEBUG_CUDA_FLAGS -Xptxas --suppress-stack-size-warning) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 164f0599c1..2fdf0b748f 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -61,8 +61,10 @@ function(ConfigureTest) CUDA_STANDARD_REQUIRED ON ) target_compile_options( - ${TEST_NAME} PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" + ${TEST_NAME} + PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + "$<$,$>:${CUVS_DEBUG_CUDA_FLAGS}>" ) if(_CUVS_TEST_NOCUDA) From 582d6a0e21d5c0f21a2db5a7564a96fb9c220b6f Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 6 Mar 2026 20:57:06 +0000 Subject: [PATCH 146/158] unpin raft --- cpp/cmake/thirdparty/get_raft.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index ba7f68f09d..951cec16d9 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -60,8 +60,8 @@ endfunction() # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${RAFT_VERSION}.00 - FORK divyegala - PINNED_TAG unneeded-cccl-includes +FORK ${RAFT_FORK} +PINNED_TAG ${RAFT_PINNED_TAG} ENABLE_MNMG_DEPENDENCIES OFF ENABLE_NVTX OFF BUILD_STATIC_DEPS ${CUVS_STATIC_RAPIDS_LIBRARIES} From 98a1dced405fae5500790c426039283de7179201 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 6 Mar 2026 16:01:08 -0500 Subject: [PATCH 147/158] Update cpp/cmake/thirdparty/get_raft.cmake Co-authored-by: Kyle Edwards --- cpp/cmake/thirdparty/get_raft.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 951cec16d9..505575ec4c 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -60,8 +60,8 @@ endfunction() # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${RAFT_VERSION}.00 -FORK ${RAFT_FORK} -PINNED_TAG ${RAFT_PINNED_TAG} + FORK ${RAFT_FORK} + PINNED_TAG ${RAFT_PINNED_TAG} ENABLE_MNMG_DEPENDENCIES OFF ENABLE_NVTX OFF BUILD_STATIC_DEPS ${CUVS_STATIC_RAPIDS_LIBRARIES} From a39c150770923b25110f318699928dec8817398b Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 6 Mar 2026 16:06:41 -0500 Subject: [PATCH 148/158] Update cpp/cmake/thirdparty/get_raft.cmake Co-authored-by: Kyle Edwards --- cpp/cmake/thirdparty/get_raft.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 505575ec4c..8ecf3686be 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on From 33e1bc59e9e6e613c4acf7350be0e7432274a656 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 9 Mar 2026 23:10:31 +0000 Subject: [PATCH 149/158] Add L1 dist op --- .../jit_lto_kernels/cagra_planner_base.hpp | 1 + .../cagra/jit_lto_kernels/dist_op_l1_impl.cuh | 19 +++++++++++++++++++ .../cagra/jit_lto_kernels/dist_op_matrix.json | 5 +++++ 3 files changed, 25 insertions(+) create mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l1_impl.cuh diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp index 908a27046d..8f41555b4d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/cagra_planner_base.hpp @@ -85,6 +85,7 @@ struct CagraPlannerBase : AlgorithmPlanner { case cuvs::distance::DistanceType::InnerProduct: metric_tag = "inner_product"; break; case cuvs::distance::DistanceType::CosineExpanded: metric_tag = "inner_product"; break; case cuvs::distance::DistanceType::BitwiseHamming: metric_tag = "hamming"; break; + case cuvs::distance::DistanceType::L1: metric_tag = "l1"; break; default: metric_tag = "unknown"; break; } auto params = make_fragment_key(); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l1_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l1_impl.cuh new file mode 100644 index 0000000000..cf748facf8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l1_impl.cuh @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +namespace cuvs::neighbors::cagra::detail { + +template +__device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) +{ + DISTANCE_T diff = a - b; + return raft::abs(diff * diff); +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json index 7f0772ab1f..7e7f6333d1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_matrix.json @@ -14,6 +14,11 @@ "metric_tag": "hamming", "query_type": "uint8_t", "query_abbrev": "uc" + }, + { + "metric_tag": "l1", + "query_type": "float", + "query_abbrev": "f" } ], "_distance": [ From f050b77efd6a381c20537c2d870c9e2eed74e87d Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 10 Mar 2026 21:16:40 +0000 Subject: [PATCH 150/158] Fix L1 distance --- .../neighbors/detail/cagra/jit_lto_kernels/dist_op_l1_impl.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l1_impl.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l1_impl.cuh index cf748facf8..693a84fddd 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l1_impl.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/dist_op_l1_impl.cuh @@ -13,7 +13,7 @@ template __device__ DISTANCE_T dist_op(QUERY_T a, QUERY_T b) { DISTANCE_T diff = a - b; - return raft::abs(diff * diff); + return raft::abs(diff); } } // namespace cuvs::neighbors::cagra::detail From d6eec0a5f092c95244913c6fbee4c93430475b20 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 11 Mar 2026 16:26:22 +0000 Subject: [PATCH 151/158] Explicitly install cudart --- dependencies.yaml | 8 ++++---- python/libcuvs/pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 17bb112b74..676a6b2a83 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -344,12 +344,12 @@ dependencies: cuda: "12.*" use_cuda_wheels: "true" packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink,nvrtc]==12.* + - cuda-toolkit[cublas,cudart,curand,cusolver,cusparse,nvjitlink,nvrtc]==12.* - matrix: cuda: "13.*" use_cuda_wheels: "true" packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink,nvrtc]==13.* + - cuda-toolkit[cublas,cudart,curand,cusolver,cusparse,nvjitlink,nvrtc]==13.* - matrix: use_cuda_wheels: "false" packages: @@ -357,7 +357,7 @@ dependencies: # (just as a source of documentation, as this populates pyproject.toml in source control) - matrix: packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink,nvrtc]>=12,<14 + - cuda-toolkit[cublas,cudart,curand,cusolver,cusparse,nvjitlink,nvrtc]>=12,<14 depends_on_cupy: common: - output_types: conda @@ -372,7 +372,7 @@ dependencies: - matrix: cuda: "12.*" packages: - - cupy-cuda12x>=13.6.0,<14.0 + - cupy-cuda12x>=13.6.0 # fallback to CUDA 13 versions if 'cuda' is '13.*' or not provided - matrix: packages: diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml index c7e0a57515..96dbe046b4 100644 --- a/python/libcuvs/pyproject.toml +++ b/python/libcuvs/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = "Apache-2.0" requires-python = ">=3.11" dependencies = [ - "cuda-toolkit[cublas,curand,cusolver,cusparse,nvjitlink,nvrtc]>=12,<14", + "cuda-toolkit[cublas,cudart,curand,cusolver,cusparse,nvjitlink,nvrtc]>=12,<14", "libraft==26.4.*,>=0.0.0a0", "librmm==26.4.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. From 1f3b75b0bde7dac3261af7172bce166ada087d08 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 12 Mar 2026 01:51:32 +0000 Subject: [PATCH 152/158] use function ptr indirection --- cpp/CMakeLists.txt | 13 +- .../cuvs/detail/jit_lto/AlgorithmLauncher.hpp | 1 + .../detail/cagra/compute_distance.hpp | 2 - cpp/src/neighbors/detail/cagra/factory.cuh | 3 +- .../compute_distance_kernel.cu.in | 5 + ...te_distance_to_child_nodes_embedded.cpp.in | 4 +- ...mpute_distance_to_child_nodes_kernel.cu.in | 2 +- ...ompute_distance_to_child_nodes_matrix.json | 87 +------------ .../compute_distance_vpq_impl_unified.cuh | 48 ------- .../jit_lto_kernels/device_common_jit.cuh | 102 +++------------ .../random_pickup_embedded.cpp.in | 4 +- .../random_pickup_kernel.cu.in | 2 +- .../jit_lto_kernels/random_pickup_matrix.json | 87 ++----------- .../search_multi_cta_embedded.cpp.in | 5 +- .../jit_lto_kernels/search_multi_cta_jit.cuh | 99 +++++---------- .../search_multi_cta_kernel.cu.in | 2 +- .../search_multi_cta_matrix.json | 87 +------------ .../search_multi_cta_planner.hpp | 24 +--- .../jit_lto_kernels/search_multi_jit.cuh | 96 ++------------ .../search_multi_kernel_planner.hpp | 33 +---- .../search_single_cta_embedded.cpp.in | 4 +- .../jit_lto_kernels/search_single_cta_jit.cuh | 118 +++++------------- .../search_single_cta_kernel.cu.in | 2 +- .../search_single_cta_matrix.json | 107 +--------------- .../search_single_cta_p_embedded.cpp.in | 4 +- .../search_single_cta_p_kernel.cu.in | 2 +- .../search_single_cta_p_matrix.json | 107 +--------------- .../search_single_cta_planner.hpp | 35 +----- .../setup_workspace_kernel.cu.in | 6 + .../search_multi_cta_kernel_launcher_jit.cuh | 31 ++++- .../search_multi_kernel_launcher_jit.cuh | 55 ++++++++ .../search_single_cta_kernel_launcher_jit.cuh | 57 +++++++++ 32 files changed, 289 insertions(+), 945 deletions(-) delete mode 100644 cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8f3b7186e9..cc5590036a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -497,7 +497,7 @@ if(NOT BUILD_CPU_ONLY) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + "cagra_search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json" KERNEL_INPUT_FILE @@ -510,7 +510,7 @@ if(NOT BUILD_CPU_ONLY) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + "cagra_search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json" KERNEL_INPUT_FILE @@ -522,8 +522,7 @@ if(NOT BUILD_CPU_ONLY) ) generate_jit_lto_kernels( jit_lto_kernel_files - NAME_FORMAT - "cagra_search_multi_cta@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + NAME_FORMAT "cagra_search_multi_cta_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json" KERNEL_INPUT_FILE @@ -535,8 +534,7 @@ if(NOT BUILD_CPU_ONLY) ) generate_jit_lto_kernels( jit_lto_kernel_files - NAME_FORMAT - "cagra_random_pickup@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + NAME_FORMAT "cagra_random_pickup_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json" KERNEL_INPUT_FILE @@ -548,8 +546,7 @@ if(NOT BUILD_CPU_ONLY) ) generate_jit_lto_kernels( jit_lto_kernel_files - NAME_FORMAT - "cagra_compute_distance_to_child_nodes@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@_data_@data_abbrev@_query_@query_abbrev@" + NAME_FORMAT "cagra_compute_distance_to_child_nodes_data_@data_abbrev@_query_@query_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json" KERNEL_INPUT_FILE diff --git a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp index 6f551170c4..ceea50f2af 100644 --- a/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp +++ b/cpp/include/cuvs/detail/jit_lto/AlgorithmLauncher.hpp @@ -42,6 +42,7 @@ struct AlgorithmLauncher { this->call_cooperative(stream, grid, block, shared_mem, kernel_args); } + cudaLibrary_t get_library() { return this->library; } cudaKernel_t get_kernel() { return this->kernel; } private: diff --git a/cpp/src/neighbors/detail/cagra/compute_distance.hpp b/cpp/src/neighbors/detail/cagra/compute_distance.hpp index 2466795514..1548a61761 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/src/neighbors/detail/cagra/compute_distance.hpp @@ -192,7 +192,6 @@ struct alignas(device::LOAD_128BIT_T) dataset_descriptor_base_t { return smem_and_team_size.team_size(); } -#if !defined(CUVS_ENABLE_JIT_LTO) && !defined(BUILD_KERNEL) RAFT_DEVICE_INLINE_FUNCTION auto setup_workspace(void* smem_ptr, const DATA_T* queries_ptr, uint32_t query_id) const -> const base_type* @@ -206,7 +205,6 @@ struct alignas(device::LOAD_128BIT_T) dataset_descriptor_base_t { auto per_thread_distances = valid ? compute_distance_impl(args.load(), dataset_index) : 0; return device::team_sum(per_thread_distances, team_size_bitshift_from_smem()); } -#endif }; /** diff --git a/cpp/src/neighbors/detail/cagra/factory.cuh b/cpp/src/neighbors/detail/cagra/factory.cuh index a767d16530..76f7d451d6 100644 --- a/cpp/src/neighbors/detail/cagra/factory.cuh +++ b/cpp/src/neighbors/detail/cagra/factory.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -11,6 +11,7 @@ #include "search_plan.cuh" #include "search_single_cta.cuh" +#include #include namespace cuvs::neighbors::cagra::detail { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in index 6163dce4ac..8be736b753 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in @@ -12,3 +12,8 @@ template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block const args_t, @index_type@); } // namespace cuvs::neighbors::cagra::detail + +extern "C" { +__device__ @distance_type@ (*compute_distance_ptr)(const typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t, @index_type@) = + &cuvs::neighbors::cagra::detail::compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; +} diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in index b28a89f667..efcaca8b32 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in @@ -17,8 +17,8 @@ __attribute__((__constructor__)) void register_kernel() tag_idx_@index_abbrev@, tag_dist_@distance_abbrev@, tag_@query_abbrev@, - tag_idx_@source_index_abbrev@@codebook_comma@ @codebook_tag@>( - "compute_distance_to_child_nodes@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + tag_idx_@source_index_abbrev@>( + "compute_distance_to_child_nodes", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in index 1a1baed5e7..42973e55b6 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_kernel.cu.in @@ -7,7 +7,7 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -template __global__ void compute_distance_to_child_nodes_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@, cuvs::neighbors::filtering::none_sample_filter>( +template __global__ void compute_distance_to_child_nodes_kernel_jit<@data_type@, @index_type@, @distance_type@, @source_index_type@, cuvs::neighbors::filtering::none_sample_filter>( const @index_type@* const, @index_type@* const, @distance_type@* const, const std::size_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const @data_type@*, @index_type@* const, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, cuvs::neighbors::filtering::none_sample_filter); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json index 929165330b..c798957de1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json @@ -64,16 +64,8 @@ "distance_abbrev": "f" } ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], + "team_size": ["32"], + "dataset_block_dim": ["256"], "_pq": [ { "pq_bits": "0", @@ -89,80 +81,5 @@ "codebook_comma": "" } ] - }, - { - "_data": [ - { - "data_type": "float", - "data_abbrev": "f" - }, - { - "data_type": "__half", - "data_abbrev": "h" - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc" - }, - { - "data_type": "int8_t", - "data_abbrev": "sc" - } - ], - "_query": [ - { - "query_type": "half", - "query_abbrev": "h" - } - ], - "_source_index": [ - { - "source_index_type": "uint32_t", - "source_index_abbrev": "ui" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } - ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], - "_pq": [ - { - "pq_bits": "8", - "pq_len": "2", - "pq_prefix": "_vpq", - "pq_suffix": "_8pq_2subd" - }, - { - "pq_bits": "8", - "pq_len": "4", - "pq_prefix": "_vpq", - "pq_suffix": "_8pq_4subd" - } - ], - "_codebook": [ - { - "codebook_type": "half", - "codebook_tag": "tag_codebook_half", - "codebook_comma": ", " - } - ] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh deleted file mode 100644 index e21d48a2f1..0000000000 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_vpq_impl_unified.cuh +++ /dev/null @@ -1,48 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "../compute_distance_vpq-impl.cuh" -#include "../device_common.hpp" // For dataset_descriptor_base_t - -namespace cuvs::neighbors::cagra::detail { - -// Unified compute_distance implementation for VPQ descriptors -// This is instantiated when PQ_BITS>0, PQ_LEN>0, CodebookT=half -// QueryT is always half for VPQ -template -__device__ DistanceT -compute_distance(const typename dataset_descriptor_base_t::args_t args, - IndexT dataset_index) -{ - // For VPQ descriptors, PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half - static_assert( - PQ_BITS > 0 && PQ_LEN > 0 && std::is_same_v && std::is_same_v, - "VPQ descriptor requires PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half"); - - // Reconstruct the descriptor type and call compute_distance_vpq - // QueryT is always half for VPQ - using desc_t = cagra_q_dataset_descriptor_t; - return compute_distance_vpq(args, dataset_index); -} - -} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh index 983fe93fe3..f89eaefc27 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh @@ -32,20 +32,11 @@ inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; // JIT version of compute_distance_to_random_nodes - uses dataset_descriptor_base_t* pointer // Shared between single_cta and multi_cta JIT kernels -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT -template +template RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( IndexT* __restrict__ result_indices_ptr, // [num_pickup] DistanceT* __restrict__ result_distances_ptr, // [num_pickup] - dataset_descriptor_base_t* smem_desc, + const dataset_descriptor_base_t* smem_desc, const uint32_t num_pickup, const uint32_t num_distilation, const uint64_t rand_xor_mask, @@ -60,13 +51,8 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( { constexpr unsigned warp_size = 32; - // Get team_size_bits and args directly from base descriptor - using args_t = typename cuvs::neighbors::cagra::detail:: - dataset_descriptor_base_t::args_t; - // Use team_size_bitshift_from_smem since smem_desc is in shared memory uint32_t team_size_bits = smem_desc->team_size_bitshift_from_smem(); - args_t args = smem_desc->args.load(); IndexT dataset_size = smem_desc->size; const auto max_i = raft::round_up_safe(num_pickup, warp_size >> team_size_bits); @@ -88,27 +74,10 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( } } - // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum - // Otherwise warp shuffles will hang. Each thread calls the unified extern function to get - // its per-thread distance, then team_sum reduces across all threads in the team. - DistanceT per_thread_norm2 = 0; - if (valid_i) { - // Use unified compute_distance function (links standard or VPQ fragment at runtime) - per_thread_norm2 = compute_distance(args, seed_index); - } - // Now ALL threads in the team participate in team_sum - const auto norm2_sum = device::team_sum(per_thread_norm2, team_size_bits); + const auto norm2 = smem_desc->compute_distance(seed_index, valid_i); - if (valid_i && (norm2_sum < best_norm2_team_local)) { - best_norm2_team_local = norm2_sum; + if (valid_i && (norm2 < best_norm2_team_local)) { + best_norm2_team_local = norm2; best_index_team_local = seed_index; } } @@ -136,21 +105,11 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( // JIT version of compute_distance_to_child_nodes - uses dataset_descriptor_base_t* pointer // Shared between single_cta and multi_cta JIT kernels -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT -template +template RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( IndexT* __restrict__ result_child_indices_ptr, DistanceT* __restrict__ result_child_distances_ptr, - dataset_descriptor_base_t* smem_desc, + const dataset_descriptor_base_t* smem_desc, const IndexT* __restrict__ knn_graph, const uint32_t knn_k, IndexT* __restrict__ visited_hashmap_ptr, @@ -193,49 +152,26 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( } __syncthreads(); - // Compute the distance to child nodes using unified extern compute_distance + // Compute the distance to child nodes - same inline pattern as non-JIT (device_common.hpp) constexpr unsigned warp_size = 32; - // Get team_size_bits and args directly from base descriptor - using args_t = typename cuvs::neighbors::cagra::detail:: - dataset_descriptor_base_t::args_t; - - // Use team_size_bitshift_from_smem since smem_desc is in shared memory - uint32_t team_size_bits = smem_desc->team_size_bitshift_from_smem(); - args_t args = smem_desc->args.load(); - - const auto num_k = knn_k * search_width; - const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bits); - const bool lead_lane = (threadIdx.x & ((1u << team_size_bits) - 1u)) == 0; - const uint32_t ofst = STATIC_RESULT_POSITION ? 0 : result_position[0]; + const auto team_size_bits = smem_desc->team_size_bitshift_from_smem(); + const auto num_k = knn_k * search_width; + const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bits); + const auto compute_distance = smem_desc->compute_distance_impl; + const auto args = smem_desc->args.load(); + const bool lead_lane = (threadIdx.x & ((1u << team_size_bits) - 1u)) == 0; + const uint32_t ofst = STATIC_RESULT_POSITION ? 0 : result_position[0]; for (uint32_t i = threadIdx.x >> team_size_bits; i < max_i; i += blockDim.x >> team_size_bits) { const auto j = i + ofst; const bool valid_i = STATIC_RESULT_POSITION ? (j < num_k) : (j < max_result_position); const auto child_id = valid_i ? result_child_indices_ptr[j] : invalid_index; - // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum - // Otherwise warp shuffles will hang. Each thread calls the unified extern function to get - // its per-thread distance, then team_sum reduces across all threads in the team. - DistanceT per_thread_dist = 0; - if (child_id != invalid_index) { - // Use unified compute_distance function (links standard or VPQ fragment at runtime) - per_thread_dist = compute_distance(args, child_id); - } else { - // Invalid child_id: lead lane gets upper_bound, others get 0 - per_thread_dist = lead_lane ? raft::upper_bound() : 0; - } - - // Now ALL threads in the team participate in team_sum - DistanceT child_dist = device::team_sum(per_thread_dist, team_size_bits); + const DistanceT child_dist = device::team_sum( + (child_id != invalid_index) ? compute_distance(args, child_id) + : (lead_lane ? raft::upper_bound() : 0), + team_size_bits); __syncwarp(); // Store the distance diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in index 49d89f6416..70783711ee 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in @@ -17,8 +17,8 @@ __attribute__((__constructor__)) void register_kernel() tag_idx_@index_abbrev@, tag_dist_@distance_abbrev@, tag_@query_abbrev@, - tag_idx_@index_abbrev@@codebook_comma@ @codebook_tag@>( - "random_pickup@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + tag_idx_@source_index_abbrev@>( + "random_pickup", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in index d5424b780b..2c13fd433d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_kernel.cu.in @@ -7,7 +7,7 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -template __global__ void random_pickup_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( +template __global__ void random_pickup_kernel_jit<@data_type@, @index_type@, @distance_type@>( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const std::size_t, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, @index_type@* const, @distance_type@* const, const std::uint32_t, @index_type@* const, const std::uint32_t); } // namespace cuvs::neighbors::cagra::detail::multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json index 3c014f8580..c798957de1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json @@ -46,6 +46,12 @@ ] } ], + "_source_index": [ + { + "source_index_type": "uint32_t", + "source_index_abbrev": "ui" + } + ], "_index": [ { "index_type": "uint32_t", @@ -58,16 +64,8 @@ "distance_abbrev": "f" } ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], + "team_size": ["32"], + "dataset_block_dim": ["256"], "_pq": [ { "pq_bits": "0", @@ -83,74 +81,5 @@ "codebook_comma": "" } ] - }, - { - "_data": [ - { - "data_type": "float", - "data_abbrev": "f" - }, - { - "data_type": "__half", - "data_abbrev": "h" - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc" - }, - { - "data_type": "int8_t", - "data_abbrev": "sc" - } - ], - "_query": [ - { - "query_type": "half", - "query_abbrev": "h" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } - ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], - "_pq": [ - { - "pq_bits": "8", - "pq_len": "2", - "pq_prefix": "_vpq", - "pq_suffix": "_8pq_2subd" - }, - { - "pq_bits": "8", - "pq_len": "4", - "pq_prefix": "_vpq", - "pq_suffix": "_8pq_4subd" - } - ], - "_codebook": [ - { - "codebook_type": "half", - "codebook_tag": "tag_codebook_half", - "codebook_comma": ", " - } - ] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in index a70e22c696..09af166223 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_embedded.cpp.in @@ -16,9 +16,8 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( - "search_multi_cta@pq_prefix@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + tag_idx_@source_index_abbrev@>( + "search_multi_cta", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh index 2f7cbc1c42..ca4c9438d5 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh @@ -28,16 +28,7 @@ using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_ji using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; using cuvs::neighbors::detail::sample_filter; -template +template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( IndexT* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] DistanceT* const result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] @@ -107,19 +98,8 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( uint32_t dim = dataset_desc->args.dim; uint32_t smem_ws_size_in_bytes = dataset_desc->smem_ws_size_in_bytes(); - // Set smem working buffer using unified setup_workspace - // setup_workspace copies the descriptor to shared memory and returns base pointer to smem - // descriptor - dataset_descriptor_base_t* smem_desc = - setup_workspace(dataset_desc, smem, queries_ptr, query_id); + // Set smem working buffer using descriptor->setup_workspace (JIT symbols patched by launcher) + auto* smem_desc = dataset_desc->setup_workspace(smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); @@ -151,28 +131,20 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( uint32_t block_id = cta_id + (num_cta_per_query * query_id); uint32_t num_blocks = num_cta_per_query * num_queries; - compute_distance_to_random_nodes_jit(result_indices_buffer, - result_distances_buffer, - smem_desc, - graph_degree, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - visited_hash_bitlen, - local_traversed_hashmap_ptr, - traversed_hash_bitlen, - block_id, - num_blocks); + compute_distance_to_random_nodes_jit(result_indices_buffer, + result_distances_buffer, + smem_desc, + graph_degree, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + visited_hash_bitlen, + local_traversed_hashmap_ptr, + traversed_hash_bitlen, + block_id, + num_blocks); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -252,29 +224,20 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( __syncthreads(); // Compute the norms between child nodes and query node using JIT version - compute_distance_to_child_nodes_jit(result_indices_buffer, - result_distances_buffer, - smem_desc, - knn_graph, - graph_degree, - local_visited_hashmap_ptr, - visited_hash_bitlen, - local_traversed_hashmap_ptr, - traversed_hash_bitlen, - parent_indices_buffer, - result_indices_buffer, - 1, - result_position, - result_buffer_size_32); + compute_distance_to_child_nodes_jit(result_indices_buffer, + result_distances_buffer, + smem_desc, + knn_graph, + graph_degree, + local_visited_hashmap_ptr, + visited_hash_bitlen, + local_traversed_hashmap_ptr, + traversed_hash_bitlen, + parent_indices_buffer, + result_indices_buffer, + 1, + result_position, + result_buffer_size_32); __syncthreads(); // Check the state of the nodes in the result buffer which were not updated diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in index 1501a22ff5..776f9d9102 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_kernel.cu.in @@ -8,7 +8,7 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { -template __global__ void search_kernel_jit<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@>( +template __global__ void search_kernel_jit<@data_type@, @index_type@, @distance_type@, @source_index_type@>( @index_type@* const, @distance_type@* const, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, const @data_type@* const, const @index_type@* const, const std::uint32_t, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const std::uint32_t, const std::uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json index 929165330b..c798957de1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json @@ -64,16 +64,8 @@ "distance_abbrev": "f" } ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], + "team_size": ["32"], + "dataset_block_dim": ["256"], "_pq": [ { "pq_bits": "0", @@ -89,80 +81,5 @@ "codebook_comma": "" } ] - }, - { - "_data": [ - { - "data_type": "float", - "data_abbrev": "f" - }, - { - "data_type": "__half", - "data_abbrev": "h" - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc" - }, - { - "data_type": "int8_t", - "data_abbrev": "sc" - } - ], - "_query": [ - { - "query_type": "half", - "query_abbrev": "h" - } - ], - "_source_index": [ - { - "source_index_type": "uint32_t", - "source_index_abbrev": "ui" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } - ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], - "_pq": [ - { - "pq_bits": "8", - "pq_len": "2", - "pq_prefix": "_vpq", - "pq_suffix": "_8pq_2subd" - }, - { - "pq_bits": "8", - "pq_len": "4", - "pq_prefix": "_vpq", - "pq_suffix": "_8pq_4subd" - } - ], - "_codebook": [ - { - "codebook_type": "half", - "codebook_tag": "tag_codebook_half", - "codebook_comma": ", " - } - ] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp index ab819d4bef..1ea392f43d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_planner.hpp @@ -33,31 +33,9 @@ struct CagraMultiCtaSearchPlanner uint32_t pq_bits = 0, uint32_t pq_len = 0) : CagraPlannerBase( - build_entrypoint_name(metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len), - is_vpq ? make_fragment_key() - : make_fragment_key()) + "search_multi_cta", make_fragment_key()) { } - - static std::string build_entrypoint_name(cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits, - uint32_t pq_len) - { - std::string name = "search_multi_cta"; - if (is_vpq) { name += "_vpq"; } - name += "_team_size_" + std::to_string(team_size); - name += "_dataset_block_dim_" + std::to_string(dataset_block_dim); - if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } - return name; - } }; } // namespace multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh index 4c44c6ee6a..dd22d8afe5 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh @@ -29,15 +29,7 @@ struct has_kpq_bits { template inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; -template +template RAFT_KERNEL random_pickup_kernel_jit( dataset_descriptor_base_t* dataset_desc, const DataT* const queries_ptr, // [num_queries, dataset_dim] @@ -65,25 +57,10 @@ RAFT_KERNEL random_pickup_kernel_jit( if (global_team_index >= num_pickup) { return; } extern __shared__ uint8_t smem[]; - // Set smem working buffer using unified setup_workspace - // setup_workspace copies the descriptor to shared memory and returns base pointer to smem - // descriptor NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - dataset_descriptor_base_t* smem_desc = - setup_workspace(dataset_desc, smem, queries_ptr, query_id); + // Set smem working buffer using descriptor->setup_workspace (JIT symbols patched by launcher) + auto* smem_desc = dataset_desc->setup_workspace(smem, queries_ptr, query_id); __syncthreads(); - // Load args once for better performance (avoid repeated loads in the loop) - using args_t = typename cuvs::neighbors::cagra::detail:: - dataset_descriptor_base_t::args_t; - args_t args = smem_desc->args.load(); IndexT dataset_size = smem_desc->size; INDEX_T best_index_team_local; @@ -97,22 +74,8 @@ RAFT_KERNEL random_pickup_kernel_jit( seed_index = device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_size; } - // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum - // Otherwise warp shuffles will hang. Each thread calls the unified extern function to get - // its per-thread distance, then team_sum reduces across all threads in the team. - DistanceT per_thread_norm2 = 0; - // Use unified compute_distance function (planner links standard or VPQ fragment at runtime) - per_thread_norm2 = compute_distance(args, seed_index); - // Now ALL threads in the team participate in team_sum - const auto norm2 = device::team_sum(per_thread_norm2, team_size_bits); + // Use descriptor->compute_distance (JIT symbols patched by launcher) + const auto norm2 = smem_desc->compute_distance(seed_index, true); if (norm2 < best_norm2_team_local) { best_norm2_team_local = norm2; @@ -133,15 +96,9 @@ RAFT_KERNEL random_pickup_kernel_jit( } } -template RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( @@ -175,19 +132,8 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( const auto query_id = blockIdx.y; extern __shared__ uint8_t smem[]; - // Load a query using unified setup_workspace - // setup_workspace copies the descriptor to shared memory and returns base pointer to smem - // descriptor NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - dataset_descriptor_base_t* smem_desc = - setup_workspace(dataset_desc, smem, query_ptr, query_id); + // Load a query using descriptor->setup_workspace (JIT symbols patched by launcher) + auto* smem_desc = dataset_desc->setup_workspace(smem, query_ptr, query_id); __syncthreads(); if (global_team_id >= search_width * graph_degree) { return; } @@ -213,29 +159,9 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( const auto compute_distance_flag = hashmap::insert( team_size, visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id); - // Load args once for better performance (avoid repeated loads) - using args_t = typename cuvs::neighbors::cagra::detail:: - dataset_descriptor_base_t::args_t; - args_t args = smem_desc->args.load(); - - // CRITICAL: ALL threads in the team must participate in compute_distance and team_sum - // Otherwise warp shuffles will hang. Each thread calls the unified extern function to get - // its per-thread distance, then team_sum reduces across all threads in the team. - DISTANCE_T per_thread_norm2 = 0; - if (compute_distance_flag) { - // Use unified compute_distance function (planner links standard or VPQ fragment at runtime) - per_thread_norm2 = compute_distance(args, child_id); - } - // Now ALL threads in the team participate in team_sum - DISTANCE_T norm2 = device::team_sum(per_thread_norm2, team_size_bits); + // All threads in the team must call compute_distance so team_sum doesn't deadlock (match non-JIT) + DISTANCE_T norm2 = + smem_desc->compute_distance(static_cast(child_id), compute_distance_flag); if (compute_distance_flag) { if ((threadIdx.x & (team_size - 1)) == 0) { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index 6d1ac46e35..efdc30d2bf 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -31,41 +31,12 @@ struct CagraMultiKernelSearchPlanner uint32_t pq_bits = 0, uint32_t pq_len = 0) : CagraPlannerBase( - build_entrypoint_name( - kernel_name, metric, team_size, dataset_block_dim, is_vpq, pq_bits, pq_len), - // Special case: apply_filter_kernel doesn't use DataTag, only IndexTag, DistanceTag, - // SourceIndexTag + kernel_name, (kernel_name == "apply_filter_kernel") ? make_fragment_key() - : (is_vpq - ? make_fragment_key() - : make_fragment_key())) + : make_fragment_key()) { } - - private: - static std::string build_entrypoint_name(const std::string& kernel_name, - cuvs::distance::DistanceType metric, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits, - uint32_t pq_len) - { - if (kernel_name == "apply_filter_kernel") { return kernel_name; } - - std::string name = kernel_name; - if (is_vpq) { name += "_vpq"; } - name += "_team_size_" + std::to_string(team_size); - name += "_dataset_block_dim_" + std::to_string(dataset_block_dim); - if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } - return name; - } }; } // namespace multi_kernel_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in index fbb3735454..285cadf441 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in @@ -17,8 +17,8 @@ __attribute__((__constructor__)) void register_kernel() tag_idx_@index_abbrev@, tag_dist_@distance_abbrev@, tag_@query_abbrev@, - tag_idx_@source_index_abbrev@@codebook_comma@ @codebook_tag@>( - "search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + tag_idx_@source_index_abbrev@>( + "search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh index 9d21c13d3f..99ab338151 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh @@ -54,21 +54,12 @@ using cuvs::neighbors::detail::sample_filter; using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; -// JIT version of search_core - uses dataset_descriptor_base_t* pointer -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT -// For standard descriptors: PQ_BITS=0, PQ_LEN=0, CodebookT=void, QueryT=float (or uint8_t for -// BitwiseHamming) For VPQ descriptors: PQ_BITS>0, PQ_LEN>0, CodebookT=half, QueryT=half +// JIT search_core - setup_workspace/compute_distance via descriptor template RAFT_DEVICE_INLINE_FUNCTION void search_core( uintptr_t result_indices_ptr, @@ -134,19 +125,7 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( uint32_t dim = dataset_desc->args.dim; uint32_t smem_ws_size_in_bytes = dataset_desc->smem_ws_size_in_bytes(); - // Set smem working buffer using unified setup_workspace - // setup_workspace copies the descriptor to shared memory and returns base pointer to smem - // descriptor NOTE: setup_workspace must be called by ALL threads (it uses __syncthreads()) - dataset_descriptor_base_t* smem_desc = - setup_workspace(dataset_desc, smem, queries_ptr, query_id); + auto* smem_desc = dataset_desc->setup_workspace(smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); @@ -184,26 +163,18 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( const IndexT* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; // Get dataset_size directly from base descriptor IndexT dataset_size = smem_desc->size; - compute_distance_to_random_nodes_jit(result_indices_buffer, - result_distances_buffer, - smem_desc, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen, - (IndexT*)nullptr, - 0); + compute_distance_to_random_nodes_jit(result_indices_buffer, + result_distances_buffer, + smem_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -311,26 +282,19 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( __syncthreads(); // compute the norms between child nodes and query node using JIT version _CLK_START(); - compute_distance_to_child_nodes_jit(result_indices_buffer + internal_topk, - result_distances_buffer + internal_topk, - smem_desc, - knn_graph, - graph_degree, - local_visited_hashmap_ptr, - hash_bitlen, - (IndexT*)nullptr, - 0, - parent_list_buffer, - result_indices_buffer, - search_width); + compute_distance_to_child_nodes_jit( + result_indices_buffer + internal_topk, + result_distances_buffer + internal_topk, + smem_desc, + knn_graph, + graph_degree, + local_visited_hashmap_ptr, + hash_bitlen, + (IndexT*)nullptr, + 0u, + parent_list_buffer, + result_indices_buffer, + search_width); // Critical: __syncthreads() must be reached by ALL threads // If any thread is stuck in compute_distance_to_child_nodes_jit, this will hang __syncthreads(); @@ -499,18 +463,11 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( } // JIT kernel wrapper - calls search_core -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT template RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( uintptr_t result_indices_ptr, @@ -544,15 +501,9 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_jit( const auto query_id = blockIdx.y; search_core(result_indices_ptr, result_distances_ptr, top_k, @@ -592,19 +543,12 @@ struct job_desc_jit_helper_desc { using DISTANCE_T = DistanceT; }; -// JIT persistent kernel - uses extern functions and JIT search_core -// Unified template parameters: TeamSize, DatasetBlockDim, PQ_BITS, PQ_LEN, CodebookT, QueryT +// JIT persistent kernel template RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_p_jit( worker_handle_t* worker_handles, @@ -678,15 +622,9 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel_p_jit( // work phase - use JIT search_core search_core(result_indices_ptr, result_distances_ptr, top_k, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in index 2785ea46ac..6b86dcf72d 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_kernel.cu.in @@ -7,7 +7,7 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { -template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@>( +template __global__ __launch_bounds__(1024, 1) void search_kernel_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( uintptr_t, @distance_type@* const, const std::uint32_t, const @data_type@* const, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json index d9f3e97653..4f2ad7d928 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json @@ -84,16 +84,8 @@ "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" } ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], + "team_size": ["32"], + "dataset_block_dim": ["256"], "_pq": [ { "pq_bits": "0", @@ -109,100 +101,5 @@ "codebook_comma": "" } ] - }, - { - "_data": [ - { - "data_type": "float", - "data_abbrev": "f" - }, - { - "data_type": "__half", - "data_abbrev": "h" - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc" - }, - { - "data_type": "int8_t", - "data_abbrev": "sc" - } - ], - "_query": [ - { - "query_type": "half", - "query_abbrev": "h" - } - ], - "_source_index": [ - { - "source_index_type": "uint32_t", - "source_index_abbrev": "ui" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } - ], - "_topk_by_bitonic": [ - { - "topk_by_bitonic_sort": "true", - "topk_by_bitonic_sort_str": "topk_by_bitonic_sort" - }, - { - "topk_by_bitonic_sort": "false", - "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort" - } - ], - "_bitonic_sort_and_merge_multi_warps": [ - { - "bitonic_sort_and_merge_multi_warps": "true", - "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps" - }, - { - "bitonic_sort_and_merge_multi_warps": "false", - "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" - } - ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], - "_pq": [ - { - "pq_bits": "8", - "pq_len": "2", - "pq_prefix": "", - "pq_suffix": "_8pq_2subd" - }, - { - "pq_bits": "8", - "pq_len": "4", - "pq_prefix": "", - "pq_suffix": "_8pq_4subd" - } - ], - "_codebook": [ - { - "codebook_type": "half", - "codebook_tag": "tag_codebook_half", - "codebook_comma": ", " - } - ] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in index 953a6d7cea..a9c97b7a11 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in @@ -17,8 +17,8 @@ __attribute__((__constructor__)) void register_kernel() tag_idx_@index_abbrev@, tag_dist_@distance_abbrev@, tag_@query_abbrev@, - tag_idx_@source_index_abbrev@@codebook_comma@ @codebook_tag@>( - "search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_team_size_@team_size@_dataset_block_dim_@dataset_block_dim@@pq_suffix@", + tag_idx_@source_index_abbrev@>( + "search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@", embedded_fatbin, sizeof(embedded_fatbin)); } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in index 96e4784101..1913f139dd 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_kernel.cu.in @@ -7,7 +7,7 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { -template __global__ void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@, @source_index_type@>( +template __global__ void search_kernel_p_jit<@topk_by_bitonic_sort@, @bitonic_sort_and_merge_multi_warps@, @data_type@, @index_type@, @distance_type@, @source_index_type@>( worker_handle_t*, job_desc_t>*, uint32_t*, const @index_type@* const, const std::uint32_t, const @source_index_type@*, const unsigned, const uint64_t, const @index_type@*, const uint32_t, @index_type@* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, std::uint32_t* const, const std::uint32_t, const std::uint32_t, const std::uint32_t, const std::uint32_t, cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, uint32_t*, @source_index_type@, @source_index_type@); } // namespace cuvs::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json index d9f3e97653..4f2ad7d928 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json @@ -84,16 +84,8 @@ "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" } ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], + "team_size": ["32"], + "dataset_block_dim": ["256"], "_pq": [ { "pq_bits": "0", @@ -109,100 +101,5 @@ "codebook_comma": "" } ] - }, - { - "_data": [ - { - "data_type": "float", - "data_abbrev": "f" - }, - { - "data_type": "__half", - "data_abbrev": "h" - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc" - }, - { - "data_type": "int8_t", - "data_abbrev": "sc" - } - ], - "_query": [ - { - "query_type": "half", - "query_abbrev": "h" - } - ], - "_source_index": [ - { - "source_index_type": "uint32_t", - "source_index_abbrev": "ui" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } - ], - "_topk_by_bitonic": [ - { - "topk_by_bitonic_sort": "true", - "topk_by_bitonic_sort_str": "topk_by_bitonic_sort" - }, - { - "topk_by_bitonic_sort": "false", - "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort" - } - ], - "_bitonic_sort_and_merge_multi_warps": [ - { - "bitonic_sort_and_merge_multi_warps": "true", - "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps" - }, - { - "bitonic_sort_and_merge_multi_warps": "false", - "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" - } - ], - "team_size": [ - "8", - "16", - "32" - ], - "dataset_block_dim": [ - "128", - "256", - "512" - ], - "_pq": [ - { - "pq_bits": "8", - "pq_len": "2", - "pq_prefix": "", - "pq_suffix": "_8pq_2subd" - }, - { - "pq_bits": "8", - "pq_len": "4", - "pq_prefix": "", - "pq_suffix": "_8pq_4subd" - } - ], - "_codebook": [ - { - "codebook_type": "half", - "codebook_tag": "tag_codebook_half", - "codebook_comma": ", " - } - ] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index 24da6d42ad..57f3190ff9 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -35,43 +35,20 @@ struct CagraSingleCtaSearchPlanner uint32_t pq_len = 0, bool persistent = false) : CagraPlannerBase( - build_entrypoint_name(metric, - topk_by_bitonic_sort, - bitonic_sort_and_merge_multi_warps, - team_size, - dataset_block_dim, - is_vpq, - pq_bits, - pq_len, - persistent), - is_vpq ? make_fragment_key() - : make_fragment_key()) + build_entrypoint_name(topk_by_bitonic_sort, bitonic_sort_and_merge_multi_warps, persistent), + make_fragment_key()) { } private: - static std::string build_entrypoint_name(cuvs::distance::DistanceType metric, - bool topk_by_bitonic_sort, + static std::string build_entrypoint_name(bool topk_by_bitonic_sort, bool bitonic_sort_and_merge_multi_warps, - uint32_t team_size, - uint32_t dataset_block_dim, - bool is_vpq, - uint32_t pq_bits, - uint32_t pq_len, bool persistent) { std::string name = (persistent ? "search_single_cta_p" : "search_single_cta"); - name += std::string(topk_by_bitonic_sort ? "_" : "_no_") + "topk_by_bitonic_sort"; - name += std::string(bitonic_sort_and_merge_multi_warps ? "_" : "_no_") + - "bitonic_sort_and_merge_multi_warps"; - name += "_team_size_" + std::to_string(team_size); - name += "_dataset_block_dim_" + std::to_string(dataset_block_dim); - if (is_vpq) { name += "_" + std::to_string(pq_bits) + "pq_" + std::to_string(pq_len) + "subd"; } + name += (topk_by_bitonic_sort ? "_" : "_no_") + std::string("topk_by_bitonic_sort"); + name += (bitonic_sort_and_merge_multi_warps ? "_" : "_no_") + + std::string("bitonic_sort_and_merge_multi_warps"); return name; } }; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in index 7872328efb..c399632d00 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in @@ -11,3 +11,9 @@ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@d cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); } // namespace cuvs::neighbors::cagra::detail + +extern "C" { +__device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* (*setup_workspace_ptr)( + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t) = + &cuvs::neighbors::cagra::detail::setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; +} diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index a18e44df51..434e75ded2 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -91,8 +92,7 @@ void select_and_run_jit( using DistTag = decltype(get_distance_type_tag()); using SourceTag = decltype(get_source_index_type_tag()); - // Create planner and register device functions - // Pass team_size, dataset_block_dim, and VPQ parameters to match the kernel entrypoint name + // Create planner and register device functions (setup_workspace, compute_distance, etc.) std::shared_ptr launcher; if (dataset_desc.is_vpq) { using QueryTag = query_type_tag_vpq_t; @@ -206,6 +206,33 @@ void select_and_run_jit( dataset_desc.dev_ptr(stream); const auto* dev_desc = dev_desc_base; + // Patch descriptor with JIT symbols (setup_workspace_ptr, compute_distance_ptr) + using dev_descriptor_t = + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t; + auto library = launcher->get_library(); + size_t ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); + void* setup_workspace_ptr_addr = nullptr; + RAFT_CUDA_TRY( + cudaLibraryGetGlobal(&setup_workspace_ptr_addr, &ptr_size, library, "setup_workspace_ptr")); + std::uintptr_t dev_desc_setup_impl_addr = + reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), + setup_workspace_ptr_addr, + sizeof(typename dev_descriptor_t::setup_workspace_type*), + cudaMemcpyDeviceToDevice, + stream)); + ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); + void* compute_distance_ptr_addr = nullptr; + RAFT_CUDA_TRY( + cudaLibraryGetGlobal(&compute_distance_ptr_addr, &ptr_size, library, "compute_distance_ptr")); + std::uintptr_t dev_desc_compute_dist_impl_addr = + reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, compute_distance_impl); + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), + compute_distance_ptr_addr, + sizeof(typename dev_descriptor_t::compute_distance_type*), + cudaMemcpyDeviceToDevice, + stream)); + // Note: dataset_desc is passed by const reference, so it stays alive for the duration of this // function The descriptor's state is managed by a shared_ptr internally, so no need to explicitly // keep it alive diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh index c3f13d07c3..dc0882114e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -141,6 +142,33 @@ void random_pickup_jit(const dataset_descriptor_host& // Get the device descriptor pointer const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); + // Patch descriptor with JIT symbols (setup_workspace_ptr, compute_distance_ptr) + using dev_descriptor_t = + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t; + auto library = launcher->get_library(); + size_t ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); + void* setup_workspace_ptr_addr = nullptr; + RAFT_CUDA_TRY( + cudaLibraryGetGlobal(&setup_workspace_ptr_addr, &ptr_size, library, "setup_workspace_ptr")); + std::uintptr_t dev_desc_setup_impl_addr = + reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), + setup_workspace_ptr_addr, + sizeof(typename dev_descriptor_t::setup_workspace_type*), + cudaMemcpyDeviceToDevice, + cuda_stream)); + ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); + void* compute_distance_ptr_addr = nullptr; + RAFT_CUDA_TRY( + cudaLibraryGetGlobal(&compute_distance_ptr_addr, &ptr_size, library, "compute_distance_ptr")); + std::uintptr_t dev_desc_compute_dist_impl_addr = + reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, compute_distance_impl); + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), + compute_distance_ptr_addr, + sizeof(typename dev_descriptor_t::compute_distance_type*), + cudaMemcpyDeviceToDevice, + cuda_stream)); + // Cast size_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly const uint32_t ldr_u32 = static_cast(ldr); @@ -284,6 +312,33 @@ void compute_distance_to_child_nodes_jit( // Get the device descriptor pointer const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); + // Patch descriptor with JIT symbols (setup_workspace_ptr, compute_distance_ptr) + using dev_descriptor_t = + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t; + auto library = launcher->get_library(); + size_t ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); + void* setup_workspace_ptr_addr = nullptr; + RAFT_CUDA_TRY( + cudaLibraryGetGlobal(&setup_workspace_ptr_addr, &ptr_size, library, "setup_workspace_ptr")); + std::uintptr_t dev_desc_setup_impl_addr = + reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), + setup_workspace_ptr_addr, + sizeof(typename dev_descriptor_t::setup_workspace_type*), + cudaMemcpyDeviceToDevice, + cuda_stream)); + ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); + void* compute_distance_ptr_addr = nullptr; + RAFT_CUDA_TRY( + cudaLibraryGetGlobal(&compute_distance_ptr_addr, &ptr_size, library, "compute_distance_ptr")); + std::uintptr_t dev_desc_compute_dist_impl_addr = + reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, compute_distance_impl); + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), + compute_distance_ptr_addr, + sizeof(typename dev_descriptor_t::compute_distance_type*), + cudaMemcpyDeviceToDevice, + cuda_stream)); + // Dispatch kernel via launcher launcher->dispatch(cuda_stream, grid_size, diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 336bff5d81..8554b3bfad 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -434,6 +435,34 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn // Get the device descriptor pointer - kernel will use the concrete type from template const auto* dev_desc = dataset_desc.get().dev_ptr(stream); + // Patch descriptor with JIT symbols (setup_workspace_ptr, compute_distance_ptr) + using dev_descriptor_t = cuvs::neighbors::cagra::detail:: + dataset_descriptor_base_t; + auto library = launcher->get_library(); + size_t ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); + void* setup_workspace_ptr_addr = nullptr; + RAFT_CUDA_TRY( + cudaLibraryGetGlobal(&setup_workspace_ptr_addr, &ptr_size, library, "setup_workspace_ptr")); + std::uintptr_t dev_desc_setup_impl_addr = + reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), + setup_workspace_ptr_addr, + sizeof(typename dev_descriptor_t::setup_workspace_type*), + cudaMemcpyDeviceToDevice, + stream)); + ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); + void* compute_distance_ptr_addr = nullptr; + RAFT_CUDA_TRY( + cudaLibraryGetGlobal(&compute_distance_ptr_addr, &ptr_size, library, "compute_distance_ptr")); + std::uintptr_t dev_desc_compute_dist_impl_addr = + reinterpret_cast(dev_desc) + + offsetof(dev_descriptor_t, compute_distance_impl); + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), + compute_distance_ptr_addr, + sizeof(typename dev_descriptor_t::compute_distance_type*), + cudaMemcpyDeviceToDevice, + stream)); + // Cast size_t/int64_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly const uint32_t graph_degree_u32 = static_cast(graph.extent(1)); @@ -858,6 +887,34 @@ void select_and_run_jit( num_queries, smem_size); + auto library = launcher->get_library(); + using dev_descriptor_t = + cuvs::neighbors::cagra::detail::dataset_descriptor_base_t; + size_t setup_workspace_ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); + void* setup_workspace_ptr_addr = nullptr; // device address of the global "setup_workspace_ptr" + RAFT_CUDA_TRY(cudaLibraryGetGlobal( + &setup_workspace_ptr_addr, &setup_workspace_ptr_size, library, "setup_workspace_ptr")); + std::uintptr_t dev_desc_setup_impl_addr = + reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); + // One copy each: device global -> descriptor field (all in device memory) + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), + setup_workspace_ptr_addr, + sizeof(typename dev_descriptor_t::setup_workspace_type*), + cudaMemcpyDeviceToDevice, + stream)); + size_t compute_distance_ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); + void* compute_distance_ptr_addr = nullptr; + RAFT_CUDA_TRY(cudaLibraryGetGlobal( + &compute_distance_ptr_addr, &compute_distance_ptr_size, library, "compute_distance_ptr")); + std::uintptr_t dev_desc_compute_dist_impl_addr = + reinterpret_cast(dev_desc) + + offsetof(dev_descriptor_t, compute_distance_impl); + RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), + compute_distance_ptr_addr, + sizeof(typename dev_descriptor_t::compute_distance_type*), + cudaMemcpyDeviceToDevice, + stream)); + // Dispatch kernel via launcher auto kernel_launcher = [&](auto const& kernel) -> void { launcher->dispatch( From 9243390ada762934c4acdec175ee4ed4f66138f3 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 12 Mar 2026 17:25:04 +0000 Subject: [PATCH 153/158] const --- .../detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in | 2 +- .../detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in index 8be736b753..f926b67200 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in @@ -14,6 +14,6 @@ template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block } // namespace cuvs::neighbors::cagra::detail extern "C" { -__device__ @distance_type@ (*compute_distance_ptr)(const typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t, @index_type@) = +__device__ @distance_type@ (*const compute_distance_ptr)(const typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t, @index_type@) = &cuvs::neighbors::cagra::detail::compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in index c399632d00..41f6ecec38 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in @@ -13,7 +13,7 @@ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@d } // namespace cuvs::neighbors::cagra::detail extern "C" { -__device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* (*setup_workspace_ptr)( +__device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* (*const setup_workspace_ptr)( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t) = &cuvs::neighbors::cagra::detail::setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; } From dca579ab1418bb9cb73a3e7f7886c8b1d6fa71ca Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 12 Mar 2026 18:11:52 +0000 Subject: [PATCH 154/158] extern --- .../detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in | 2 +- .../detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in index f926b67200..6cc917ee0c 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in @@ -14,6 +14,6 @@ template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block } // namespace cuvs::neighbors::cagra::detail extern "C" { -__device__ @distance_type@ (*const compute_distance_ptr)(const typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t, @index_type@) = +extern __device__ @distance_type@ (*const compute_distance_ptr)(const typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t, @index_type@) = &cuvs::neighbors::cagra::detail::compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; } diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in index 41f6ecec38..91081b0931 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in @@ -13,7 +13,7 @@ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@d } // namespace cuvs::neighbors::cagra::detail extern "C" { -__device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* (*const setup_workspace_ptr)( +extern __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* (*const setup_workspace_ptr)( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t) = &cuvs::neighbors::cagra::detail::setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; } From 1c2da370a2cfa118853b014f0cdd1a7f868c0984 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 12 Mar 2026 19:52:53 +0000 Subject: [PATCH 155/158] Re-run CI From ff3527b222296f2631b2057531c7c69639b8a154 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 12 Mar 2026 21:32:10 +0000 Subject: [PATCH 156/158] fix bug and simplify json --- cpp/CMakeLists.txt | 10 +- ...te_distance_to_child_nodes_embedded.cpp.in | 1 - ...ompute_distance_to_child_nodes_matrix.json | 88 ++------------- .../random_pickup_embedded.cpp.in | 1 - .../jit_lto_kernels/random_pickup_matrix.json | 88 ++------------- .../jit_lto_kernels/search_multi_cta_jit.cuh | 1 - .../search_multi_cta_matrix.json | 88 ++------------- .../jit_lto_kernels/search_multi_jit.cuh | 12 -- .../search_multi_kernel_planner.hpp | 2 +- .../search_single_cta_embedded.cpp.in | 1 - .../jit_lto_kernels/search_single_cta_jit.cuh | 4 - .../search_single_cta_matrix.json | 106 ++---------------- .../search_single_cta_p_embedded.cpp.in | 1 - .../search_single_cta_p_matrix.json | 106 ++---------------- .../search_single_cta_planner.hpp | 2 +- 15 files changed, 53 insertions(+), 458 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cc5590036a..06d3f99ac3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -497,7 +497,7 @@ if(NOT BUILD_CPU_ONLY) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_data_@data_abbrev@_query_@query_abbrev@" + "cagra_search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_data_@data_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json" KERNEL_INPUT_FILE @@ -510,7 +510,7 @@ if(NOT BUILD_CPU_ONLY) generate_jit_lto_kernels( jit_lto_kernel_files NAME_FORMAT - "cagra_search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_data_@data_abbrev@_query_@query_abbrev@" + "cagra_search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@_data_@data_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json" KERNEL_INPUT_FILE @@ -522,7 +522,7 @@ if(NOT BUILD_CPU_ONLY) ) generate_jit_lto_kernels( jit_lto_kernel_files - NAME_FORMAT "cagra_search_multi_cta_data_@data_abbrev@_query_@query_abbrev@" + NAME_FORMAT "cagra_search_multi_cta_data_@data_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json" KERNEL_INPUT_FILE @@ -534,7 +534,7 @@ if(NOT BUILD_CPU_ONLY) ) generate_jit_lto_kernels( jit_lto_kernel_files - NAME_FORMAT "cagra_random_pickup_data_@data_abbrev@_query_@query_abbrev@" + NAME_FORMAT "cagra_random_pickup_data_@data_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json" KERNEL_INPUT_FILE @@ -546,7 +546,7 @@ if(NOT BUILD_CPU_ONLY) ) generate_jit_lto_kernels( jit_lto_kernel_files - NAME_FORMAT "cagra_compute_distance_to_child_nodes_data_@data_abbrev@_query_@query_abbrev@" + NAME_FORMAT "cagra_compute_distance_to_child_nodes_data_@data_abbrev@" MATRIX_JSON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json" KERNEL_INPUT_FILE diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in index efcaca8b32..f0c93715dc 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_embedded.cpp.in @@ -16,7 +16,6 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( "compute_distance_to_child_nodes", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json index c798957de1..f934f26c11 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_to_child_nodes_matrix.json @@ -1,85 +1,13 @@ [ { "_data": [ - { - "data_type": "float", - "data_abbrev": "f", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "__half", - "data_abbrev": "h", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - }, - { - "query_type": "uint8_t", - "query_abbrev": "uc" - } - ] - }, - { - "data_type": "int8_t", - "data_abbrev": "sc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - } - ], - "_source_index": [ - { - "source_index_type": "uint32_t", - "source_index_abbrev": "ui" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } - ], - "team_size": ["32"], - "dataset_block_dim": ["256"], - "_pq": [ - { - "pq_bits": "0", - "pq_len": "0", - "pq_prefix": "", - "pq_suffix": "" - } - ], - "_codebook": [ - { - "codebook_type": "void", - "codebook_tag": "", - "codebook_comma": "" - } - ] + {"data_type": "float", "data_abbrev": "f"}, + {"data_type": "__half", "data_abbrev": "h"}, + {"data_type": "uint8_t", "data_abbrev": "uc"}, + {"data_type": "int8_t", "data_abbrev": "sc"} + ], + "_source_index": [{"source_index_type": "uint32_t", "source_index_abbrev": "ui"}], + "_index": [{"index_type": "uint32_t", "index_abbrev": "ui"}], + "_distance": [{"distance_type": "float", "distance_abbrev": "f"}] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in index 70783711ee..9d0c73d8b8 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_embedded.cpp.in @@ -16,7 +16,6 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( "random_pickup", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json index c798957de1..f934f26c11 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/random_pickup_matrix.json @@ -1,85 +1,13 @@ [ { "_data": [ - { - "data_type": "float", - "data_abbrev": "f", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "__half", - "data_abbrev": "h", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - }, - { - "query_type": "uint8_t", - "query_abbrev": "uc" - } - ] - }, - { - "data_type": "int8_t", - "data_abbrev": "sc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - } - ], - "_source_index": [ - { - "source_index_type": "uint32_t", - "source_index_abbrev": "ui" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } - ], - "team_size": ["32"], - "dataset_block_dim": ["256"], - "_pq": [ - { - "pq_bits": "0", - "pq_len": "0", - "pq_prefix": "", - "pq_suffix": "" - } - ], - "_codebook": [ - { - "codebook_type": "void", - "codebook_tag": "", - "codebook_comma": "" - } - ] + {"data_type": "float", "data_abbrev": "f"}, + {"data_type": "__half", "data_abbrev": "h"}, + {"data_type": "uint8_t", "data_abbrev": "uc"}, + {"data_type": "int8_t", "data_abbrev": "sc"} + ], + "_source_index": [{"source_index_type": "uint32_t", "source_index_abbrev": "ui"}], + "_index": [{"index_type": "uint32_t", "index_abbrev": "ui"}], + "_distance": [{"distance_type": "float", "distance_abbrev": "f"}] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh index ca4c9438d5..d1f2e01fb4 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh @@ -26,7 +26,6 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; -using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; using cuvs::neighbors::detail::sample_filter; template __global__ __launch_bounds__(1024, 1) void search_kernel_jit( diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json index c798957de1..f934f26c11 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_matrix.json @@ -1,85 +1,13 @@ [ { "_data": [ - { - "data_type": "float", - "data_abbrev": "f", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "__half", - "data_abbrev": "h", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - }, - { - "query_type": "uint8_t", - "query_abbrev": "uc" - } - ] - }, - { - "data_type": "int8_t", - "data_abbrev": "sc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - } - ], - "_source_index": [ - { - "source_index_type": "uint32_t", - "source_index_abbrev": "ui" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } - ], - "team_size": ["32"], - "dataset_block_dim": ["256"], - "_pq": [ - { - "pq_bits": "0", - "pq_len": "0", - "pq_prefix": "", - "pq_suffix": "" - } - ], - "_codebook": [ - { - "codebook_type": "void", - "codebook_tag": "", - "codebook_comma": "" - } - ] + {"data_type": "float", "data_abbrev": "f"}, + {"data_type": "__half", "data_abbrev": "h"}, + {"data_type": "uint8_t", "data_abbrev": "uc"}, + {"data_type": "int8_t", "data_abbrev": "sc"} + ], + "_source_index": [{"source_index_type": "uint32_t", "source_index_abbrev": "ui"}], + "_index": [{"index_type": "uint32_t", "index_abbrev": "ui"}], + "_distance": [{"distance_type": "float", "distance_abbrev": "f"}] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh index dd22d8afe5..40ac504eca 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh @@ -17,18 +17,6 @@ namespace cuvs::neighbors::cagra::detail::multi_kernel_search { -template -struct has_kpq_bits { - template - static auto test(int) -> decltype(U::kPqBits, std::true_type{}); - template - static std::false_type test(...); - static constexpr bool value = decltype(test(0))::value; -}; - -template -inline constexpr bool has_kpq_bits_v = has_kpq_bits::value; - template RAFT_KERNEL random_pickup_kernel_jit( dataset_descriptor_base_t* dataset_desc, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp index efdc30d2bf..666c5985de 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_kernel_planner.hpp @@ -34,7 +34,7 @@ struct CagraMultiKernelSearchPlanner kernel_name, (kernel_name == "apply_filter_kernel") ? make_fragment_key() - : make_fragment_key()) + : make_fragment_key()) { } }; diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in index 285cadf441..510210c401 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_embedded.cpp.in @@ -16,7 +16,6 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( "search_single_cta_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh index 99ab338151..c04bf30928 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh @@ -38,10 +38,6 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { -// Helper to check if DescriptorT has kPqBits (VPQ descriptor) - use shared version -// Use fully qualified name since it's a template variable -using cuvs::neighbors::cagra::detail::device::has_kpq_bits_v; - // are defined in search_single_cta_kernel-inl.cuh which is included by the launcher. // We don't redefine them here to avoid duplicate definitions. diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json index 4f2ad7d928..a536af418e 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_matrix.json @@ -1,105 +1,21 @@ [ { "_data": [ - { - "data_type": "float", - "data_abbrev": "f", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "__half", - "data_abbrev": "h", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - }, - { - "query_type": "uint8_t", - "query_abbrev": "uc" - } - ] - }, - { - "data_type": "int8_t", - "data_abbrev": "sc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - } - ], - "_source_index": [ - { - "source_index_type": "uint32_t", - "source_index_abbrev": "ui" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } + {"data_type": "float", "data_abbrev": "f"}, + {"data_type": "__half", "data_abbrev": "h"}, + {"data_type": "uint8_t", "data_abbrev": "uc"}, + {"data_type": "int8_t", "data_abbrev": "sc"} ], + "_source_index": [{"source_index_type": "uint32_t", "source_index_abbrev": "ui"}], + "_index": [{"index_type": "uint32_t", "index_abbrev": "ui"}], + "_distance": [{"distance_type": "float", "distance_abbrev": "f"}], "_topk_by_bitonic": [ - { - "topk_by_bitonic_sort": "true", - "topk_by_bitonic_sort_str": "topk_by_bitonic_sort" - }, - { - "topk_by_bitonic_sort": "false", - "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort" - } + {"topk_by_bitonic_sort": "true", "topk_by_bitonic_sort_str": "topk_by_bitonic_sort"}, + {"topk_by_bitonic_sort": "false", "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort"} ], "_bitonic_sort_and_merge_multi_warps": [ - { - "bitonic_sort_and_merge_multi_warps": "true", - "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps" - }, - { - "bitonic_sort_and_merge_multi_warps": "false", - "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" - } - ], - "team_size": ["32"], - "dataset_block_dim": ["256"], - "_pq": [ - { - "pq_bits": "0", - "pq_len": "0", - "pq_prefix": "", - "pq_suffix": "" - } - ], - "_codebook": [ - { - "codebook_type": "void", - "codebook_tag": "", - "codebook_comma": "" - } + {"bitonic_sort_and_merge_multi_warps": "true", "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps"}, + {"bitonic_sort_and_merge_multi_warps": "false", "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps"} ] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in index a9c97b7a11..279b08b99c 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_embedded.cpp.in @@ -16,7 +16,6 @@ __attribute__((__constructor__)) void register_kernel() registerAlgorithm( "search_single_cta_p_@topk_by_bitonic_sort_str@_@bitonic_sort_and_merge_multi_warps_str@", embedded_fatbin, diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json index 4f2ad7d928..a536af418e 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_p_matrix.json @@ -1,105 +1,21 @@ [ { "_data": [ - { - "data_type": "float", - "data_abbrev": "f", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "__half", - "data_abbrev": "h", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - }, - { - "data_type": "uint8_t", - "data_abbrev": "uc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - }, - { - "query_type": "uint8_t", - "query_abbrev": "uc" - } - ] - }, - { - "data_type": "int8_t", - "data_abbrev": "sc", - "_query": [ - { - "query_type": "float", - "query_abbrev": "f" - } - ] - } - ], - "_source_index": [ - { - "source_index_type": "uint32_t", - "source_index_abbrev": "ui" - } - ], - "_index": [ - { - "index_type": "uint32_t", - "index_abbrev": "ui" - } - ], - "_distance": [ - { - "distance_type": "float", - "distance_abbrev": "f" - } + {"data_type": "float", "data_abbrev": "f"}, + {"data_type": "__half", "data_abbrev": "h"}, + {"data_type": "uint8_t", "data_abbrev": "uc"}, + {"data_type": "int8_t", "data_abbrev": "sc"} ], + "_source_index": [{"source_index_type": "uint32_t", "source_index_abbrev": "ui"}], + "_index": [{"index_type": "uint32_t", "index_abbrev": "ui"}], + "_distance": [{"distance_type": "float", "distance_abbrev": "f"}], "_topk_by_bitonic": [ - { - "topk_by_bitonic_sort": "true", - "topk_by_bitonic_sort_str": "topk_by_bitonic_sort" - }, - { - "topk_by_bitonic_sort": "false", - "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort" - } + {"topk_by_bitonic_sort": "true", "topk_by_bitonic_sort_str": "topk_by_bitonic_sort"}, + {"topk_by_bitonic_sort": "false", "topk_by_bitonic_sort_str": "no_topk_by_bitonic_sort"} ], "_bitonic_sort_and_merge_multi_warps": [ - { - "bitonic_sort_and_merge_multi_warps": "true", - "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps" - }, - { - "bitonic_sort_and_merge_multi_warps": "false", - "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps" - } - ], - "team_size": ["32"], - "dataset_block_dim": ["256"], - "_pq": [ - { - "pq_bits": "0", - "pq_len": "0", - "pq_prefix": "", - "pq_suffix": "" - } - ], - "_codebook": [ - { - "codebook_type": "void", - "codebook_tag": "", - "codebook_comma": "" - } + {"bitonic_sort_and_merge_multi_warps": "true", "bitonic_sort_and_merge_multi_warps_str": "bitonic_sort_and_merge_multi_warps"}, + {"bitonic_sort_and_merge_multi_warps": "false", "bitonic_sort_and_merge_multi_warps_str": "no_bitonic_sort_and_merge_multi_warps"} ] } ] diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp index 57f3190ff9..287e770592 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_planner.hpp @@ -36,7 +36,7 @@ struct CagraSingleCtaSearchPlanner bool persistent = false) : CagraPlannerBase( build_entrypoint_name(topk_by_bitonic_sort, bitonic_sort_and_merge_multi_warps, persistent), - make_fragment_key()) + make_fragment_key()) { } From e14a1193c4c3a1f4487f2ccb41a6f3f1fdf24ef1 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 13 Mar 2026 04:53:04 +0000 Subject: [PATCH 157/158] simply function ptr usage --- .../compute_distance_kernel.cu.in | 10 ++-- .../jit_lto_kernels/device_common_jit.cuh | 31 ++++++---- .../extern_device_functions.cuh | 8 +++ .../jit_lto_kernels/search_multi_cta_jit.cuh | 4 +- .../jit_lto_kernels/search_multi_jit.cuh | 25 ++++++--- .../jit_lto_kernels/search_single_cta_jit.cuh | 6 +- .../setup_workspace_kernel.cu.in | 10 ++-- .../search_multi_cta_kernel_launcher_jit.cuh | 27 --------- .../search_multi_kernel_launcher_jit.cuh | 54 ------------------ .../search_single_cta_kernel_launcher_jit.cuh | 56 ------------------- 10 files changed, 59 insertions(+), 172 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in index 6cc917ee0c..56144d1d7f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in @@ -4,6 +4,7 @@ */ #include +#include namespace cuvs::neighbors::cagra::detail { @@ -11,9 +12,8 @@ using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_ template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( const args_t, @index_type@); -} // namespace cuvs::neighbors::cagra::detail - -extern "C" { -extern __device__ @distance_type@ (*const compute_distance_ptr)(const typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t, @index_type@) = +template<> +__device__ @distance_type@ (*compute_distance_ptr<@data_type@, @index_type@, @distance_type@>)(const typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t, @index_type@) = &cuvs::neighbors::cagra::detail::compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; -} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh index f89eaefc27..f074a6d909 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh @@ -74,7 +74,13 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( } } - const auto norm2 = smem_desc->compute_distance(seed_index, valid_i); + const auto args_load = smem_desc->args.load(); + const auto team_bits = smem_desc->team_size_bitshift_from_smem(); + auto per_thread_distances = + valid_i ? (*cuvs::neighbors::cagra::detail:: + compute_distance_ptr)(args_load, seed_index) + : 0; + const auto norm2 = device::team_sum(per_thread_distances, team_bits); if (valid_i && (norm2 < best_norm2_team_local)) { best_norm2_team_local = norm2; @@ -155,23 +161,24 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( // Compute the distance to child nodes - same inline pattern as non-JIT (device_common.hpp) constexpr unsigned warp_size = 32; - const auto team_size_bits = smem_desc->team_size_bitshift_from_smem(); - const auto num_k = knn_k * search_width; - const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bits); - const auto compute_distance = smem_desc->compute_distance_impl; - const auto args = smem_desc->args.load(); - const bool lead_lane = (threadIdx.x & ((1u << team_size_bits) - 1u)) == 0; - const uint32_t ofst = STATIC_RESULT_POSITION ? 0 : result_position[0]; + const auto team_size_bits = smem_desc->team_size_bitshift_from_smem(); + const auto num_k = knn_k * search_width; + const auto max_i = raft::round_up_safe(num_k, warp_size >> team_size_bits); + const auto args = smem_desc->args.load(); + const bool lead_lane = (threadIdx.x & ((1u << team_size_bits) - 1u)) == 0; + const uint32_t ofst = STATIC_RESULT_POSITION ? 0 : result_position[0]; for (uint32_t i = threadIdx.x >> team_size_bits; i < max_i; i += blockDim.x >> team_size_bits) { const auto j = i + ofst; const bool valid_i = STATIC_RESULT_POSITION ? (j < num_k) : (j < max_result_position); const auto child_id = valid_i ? result_child_indices_ptr[j] : invalid_index; - const DistanceT child_dist = device::team_sum( - (child_id != invalid_index) ? compute_distance(args, child_id) - : (lead_lane ? raft::upper_bound() : 0), - team_size_bits); + auto per_thread_distances = + (child_id != invalid_index) + ? (*cuvs::neighbors::cagra::detail:: + compute_distance_ptr)(args, child_id) + : (lead_lane ? raft::upper_bound() : 0); + const DistanceT child_dist = device::team_sum(per_thread_distances, team_size_bits); __syncwarp(); // Store the distance diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh index 81f5d56f23..253a9f8ff1 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -25,6 +25,10 @@ extern __device__ dataset_descriptor_base_t* setup_wor const DataT* queries, uint32_t query_id); +template +extern __device__ dataset_descriptor_base_t* (*setup_workspace_ptr)( + dataset_descriptor_base_t*, void*, const DataT*, uint32_t); + template ::args_t args, IndexT dataset_index); + +template +extern __device__ DistanceT (*compute_distance_ptr)( + const typename dataset_descriptor_base_t::args_t, IndexT); } // namespace cuvs::neighbors::cagra::detail namespace cuvs::neighbors::detail { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh index d1f2e01fb4..a84f010449 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh @@ -97,8 +97,8 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( uint32_t dim = dataset_desc->args.dim; uint32_t smem_ws_size_in_bytes = dataset_desc->smem_ws_size_in_bytes(); - // Set smem working buffer using descriptor->setup_workspace (JIT symbols patched by launcher) - auto* smem_desc = dataset_desc->setup_workspace(smem, queries_ptr, query_id); + auto* smem_desc = + (*setup_workspace_ptr)(dataset_desc, smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh index 40ac504eca..d5c3c16b0c 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh @@ -5,6 +5,7 @@ #pragma once +#include "../device_common.hpp" #include "../hashmap.hpp" #include "../utils.hpp" @@ -45,8 +46,8 @@ RAFT_KERNEL random_pickup_kernel_jit( if (global_team_index >= num_pickup) { return; } extern __shared__ uint8_t smem[]; - // Set smem working buffer using descriptor->setup_workspace (JIT symbols patched by launcher) - auto* smem_desc = dataset_desc->setup_workspace(smem, queries_ptr, query_id); + auto* smem_desc = + (*setup_workspace_ptr)(dataset_desc, smem, queries_ptr, query_id); __syncthreads(); IndexT dataset_size = smem_desc->size; @@ -62,8 +63,11 @@ RAFT_KERNEL random_pickup_kernel_jit( seed_index = device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_size; } - // Use descriptor->compute_distance (JIT symbols patched by launcher) - const auto norm2 = smem_desc->compute_distance(seed_index, true); + const auto args_load = smem_desc->args.load(); + const auto team_bits = smem_desc->team_size_bitshift_from_smem(); + auto per_thread_distances = + (*compute_distance_ptr)(args_load, seed_index); + const auto norm2 = device::team_sum(per_thread_distances, team_bits); if (norm2 < best_norm2_team_local) { best_norm2_team_local = norm2; @@ -120,8 +124,8 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( const auto query_id = blockIdx.y; extern __shared__ uint8_t smem[]; - // Load a query using descriptor->setup_workspace (JIT symbols patched by launcher) - auto* smem_desc = dataset_desc->setup_workspace(smem, query_ptr, query_id); + auto* smem_desc = + (*setup_workspace_ptr)(dataset_desc, smem, query_ptr, query_id); __syncthreads(); if (global_team_id >= search_width * graph_degree) { return; } @@ -147,9 +151,12 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( const auto compute_distance_flag = hashmap::insert( team_size, visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id); - // All threads in the team must call compute_distance so team_sum doesn't deadlock (match non-JIT) - DISTANCE_T norm2 = - smem_desc->compute_distance(static_cast(child_id), compute_distance_flag); + const auto args = smem_desc->args.load(); + auto per_thread_distances = + compute_distance_flag + ? (*compute_distance_ptr)(args, static_cast(child_id)) + : 0; + DISTANCE_T norm2 = device::team_sum(per_thread_distances, team_size_bits); if (compute_distance_flag) { if ((threadIdx.x & (team_size - 1)) == 0) { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh index c04bf30928..eafc5987f8 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh @@ -50,7 +50,7 @@ using cuvs::neighbors::detail::sample_filter; using cuvs::neighbors::cagra::detail::device::compute_distance_to_child_nodes_jit; using cuvs::neighbors::cagra::detail::device::compute_distance_to_random_nodes_jit; -// JIT search_core - setup_workspace/compute_distance via descriptor +// JIT search_core - setup_workspace/compute_distance via function pointers template args.dim; uint32_t smem_ws_size_in_bytes = dataset_desc->smem_ws_size_in_bytes(); - auto* smem_desc = dataset_desc->setup_workspace(smem, queries_ptr, query_id); + // auto* smem_desc = dataset_desc->setup_workspace(smem, queries_ptr, query_id); + auto* smem_desc = + (*setup_workspace_ptr)(dataset_desc, smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in index 91081b0931..7a43083f8e 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in @@ -4,16 +4,16 @@ */ #include +#include namespace cuvs::neighbors::cagra::detail { template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); -} // namespace cuvs::neighbors::cagra::detail - -extern "C" { -extern __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* (*const setup_workspace_ptr)( +template<> +__device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* (*setup_workspace_ptr<@data_type@, @index_type@, @distance_type@>)( cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t) = &cuvs::neighbors::cagra::detail::setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; -} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh index 434e75ded2..152d64daa5 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel_launcher_jit.cuh @@ -206,33 +206,6 @@ void select_and_run_jit( dataset_desc.dev_ptr(stream); const auto* dev_desc = dev_desc_base; - // Patch descriptor with JIT symbols (setup_workspace_ptr, compute_distance_ptr) - using dev_descriptor_t = - cuvs::neighbors::cagra::detail::dataset_descriptor_base_t; - auto library = launcher->get_library(); - size_t ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); - void* setup_workspace_ptr_addr = nullptr; - RAFT_CUDA_TRY( - cudaLibraryGetGlobal(&setup_workspace_ptr_addr, &ptr_size, library, "setup_workspace_ptr")); - std::uintptr_t dev_desc_setup_impl_addr = - reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), - setup_workspace_ptr_addr, - sizeof(typename dev_descriptor_t::setup_workspace_type*), - cudaMemcpyDeviceToDevice, - stream)); - ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); - void* compute_distance_ptr_addr = nullptr; - RAFT_CUDA_TRY( - cudaLibraryGetGlobal(&compute_distance_ptr_addr, &ptr_size, library, "compute_distance_ptr")); - std::uintptr_t dev_desc_compute_dist_impl_addr = - reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, compute_distance_impl); - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), - compute_distance_ptr_addr, - sizeof(typename dev_descriptor_t::compute_distance_type*), - cudaMemcpyDeviceToDevice, - stream)); - // Note: dataset_desc is passed by const reference, so it stays alive for the duration of this // function The descriptor's state is managed by a shared_ptr internally, so no need to explicitly // keep it alive diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh index dc0882114e..ac4a72b5b3 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel_launcher_jit.cuh @@ -142,33 +142,6 @@ void random_pickup_jit(const dataset_descriptor_host& // Get the device descriptor pointer const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); - // Patch descriptor with JIT symbols (setup_workspace_ptr, compute_distance_ptr) - using dev_descriptor_t = - cuvs::neighbors::cagra::detail::dataset_descriptor_base_t; - auto library = launcher->get_library(); - size_t ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); - void* setup_workspace_ptr_addr = nullptr; - RAFT_CUDA_TRY( - cudaLibraryGetGlobal(&setup_workspace_ptr_addr, &ptr_size, library, "setup_workspace_ptr")); - std::uintptr_t dev_desc_setup_impl_addr = - reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), - setup_workspace_ptr_addr, - sizeof(typename dev_descriptor_t::setup_workspace_type*), - cudaMemcpyDeviceToDevice, - cuda_stream)); - ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); - void* compute_distance_ptr_addr = nullptr; - RAFT_CUDA_TRY( - cudaLibraryGetGlobal(&compute_distance_ptr_addr, &ptr_size, library, "compute_distance_ptr")); - std::uintptr_t dev_desc_compute_dist_impl_addr = - reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, compute_distance_impl); - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), - compute_distance_ptr_addr, - sizeof(typename dev_descriptor_t::compute_distance_type*), - cudaMemcpyDeviceToDevice, - cuda_stream)); - // Cast size_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly const uint32_t ldr_u32 = static_cast(ldr); @@ -312,33 +285,6 @@ void compute_distance_to_child_nodes_jit( // Get the device descriptor pointer const auto* dev_desc = dataset_desc.dev_ptr(cuda_stream); - // Patch descriptor with JIT symbols (setup_workspace_ptr, compute_distance_ptr) - using dev_descriptor_t = - cuvs::neighbors::cagra::detail::dataset_descriptor_base_t; - auto library = launcher->get_library(); - size_t ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); - void* setup_workspace_ptr_addr = nullptr; - RAFT_CUDA_TRY( - cudaLibraryGetGlobal(&setup_workspace_ptr_addr, &ptr_size, library, "setup_workspace_ptr")); - std::uintptr_t dev_desc_setup_impl_addr = - reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), - setup_workspace_ptr_addr, - sizeof(typename dev_descriptor_t::setup_workspace_type*), - cudaMemcpyDeviceToDevice, - cuda_stream)); - ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); - void* compute_distance_ptr_addr = nullptr; - RAFT_CUDA_TRY( - cudaLibraryGetGlobal(&compute_distance_ptr_addr, &ptr_size, library, "compute_distance_ptr")); - std::uintptr_t dev_desc_compute_dist_impl_addr = - reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, compute_distance_impl); - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), - compute_distance_ptr_addr, - sizeof(typename dev_descriptor_t::compute_distance_type*), - cudaMemcpyDeviceToDevice, - cuda_stream)); - // Dispatch kernel via launcher launcher->dispatch(cuda_stream, grid_size, diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh index 8554b3bfad..4705e35f4f 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel_launcher_jit.cuh @@ -435,34 +435,6 @@ struct alignas(kCacheLineBytes) persistent_runner_jit_t : public persistent_runn // Get the device descriptor pointer - kernel will use the concrete type from template const auto* dev_desc = dataset_desc.get().dev_ptr(stream); - // Patch descriptor with JIT symbols (setup_workspace_ptr, compute_distance_ptr) - using dev_descriptor_t = cuvs::neighbors::cagra::detail:: - dataset_descriptor_base_t; - auto library = launcher->get_library(); - size_t ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); - void* setup_workspace_ptr_addr = nullptr; - RAFT_CUDA_TRY( - cudaLibraryGetGlobal(&setup_workspace_ptr_addr, &ptr_size, library, "setup_workspace_ptr")); - std::uintptr_t dev_desc_setup_impl_addr = - reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), - setup_workspace_ptr_addr, - sizeof(typename dev_descriptor_t::setup_workspace_type*), - cudaMemcpyDeviceToDevice, - stream)); - ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); - void* compute_distance_ptr_addr = nullptr; - RAFT_CUDA_TRY( - cudaLibraryGetGlobal(&compute_distance_ptr_addr, &ptr_size, library, "compute_distance_ptr")); - std::uintptr_t dev_desc_compute_dist_impl_addr = - reinterpret_cast(dev_desc) + - offsetof(dev_descriptor_t, compute_distance_impl); - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), - compute_distance_ptr_addr, - sizeof(typename dev_descriptor_t::compute_distance_type*), - cudaMemcpyDeviceToDevice, - stream)); - // Cast size_t/int64_t parameters to match kernel signature exactly // The dispatch mechanism uses void* pointers, so parameter sizes must match exactly const uint32_t graph_degree_u32 = static_cast(graph.extent(1)); @@ -887,34 +859,6 @@ void select_and_run_jit( num_queries, smem_size); - auto library = launcher->get_library(); - using dev_descriptor_t = - cuvs::neighbors::cagra::detail::dataset_descriptor_base_t; - size_t setup_workspace_ptr_size = sizeof(typename dev_descriptor_t::setup_workspace_type*); - void* setup_workspace_ptr_addr = nullptr; // device address of the global "setup_workspace_ptr" - RAFT_CUDA_TRY(cudaLibraryGetGlobal( - &setup_workspace_ptr_addr, &setup_workspace_ptr_size, library, "setup_workspace_ptr")); - std::uintptr_t dev_desc_setup_impl_addr = - reinterpret_cast(dev_desc) + offsetof(dev_descriptor_t, setup_workspace_impl); - // One copy each: device global -> descriptor field (all in device memory) - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_setup_impl_addr), - setup_workspace_ptr_addr, - sizeof(typename dev_descriptor_t::setup_workspace_type*), - cudaMemcpyDeviceToDevice, - stream)); - size_t compute_distance_ptr_size = sizeof(typename dev_descriptor_t::compute_distance_type*); - void* compute_distance_ptr_addr = nullptr; - RAFT_CUDA_TRY(cudaLibraryGetGlobal( - &compute_distance_ptr_addr, &compute_distance_ptr_size, library, "compute_distance_ptr")); - std::uintptr_t dev_desc_compute_dist_impl_addr = - reinterpret_cast(dev_desc) + - offsetof(dev_descriptor_t, compute_distance_impl); - RAFT_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(dev_desc_compute_dist_impl_addr), - compute_distance_ptr_addr, - sizeof(typename dev_descriptor_t::compute_distance_type*), - cudaMemcpyDeviceToDevice, - stream)); - // Dispatch kernel via launcher auto kernel_launcher = [&](auto const& kernel) -> void { launcher->dispatch( From 39e67f367ab67992cec7fcae9b6400642221df87 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 13 Mar 2026 17:57:27 +0000 Subject: [PATCH 158/158] call functions directly --- .../compute_distance_kernel.cu.in | 21 +++++++++++++--- .../jit_lto_kernels/device_common_jit.cuh | 21 +++++++--------- .../extern_device_functions.cuh | 11 +++++++-- .../jit_lto_kernels/search_multi_cta_jit.cuh | 2 +- .../jit_lto_kernels/search_multi_jit.cuh | 24 +++++++------------ .../jit_lto_kernels/search_single_cta_jit.cuh | 3 +-- .../setup_workspace_kernel.cu.in | 12 +++++++--- 7 files changed, 55 insertions(+), 39 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in index 56144d1d7f..18955b85ec 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/compute_distance_kernel.cu.in @@ -8,12 +8,27 @@ namespace cuvs::neighbors::cagra::detail { -using args_t = typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; +using args_t = typename dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t; template __device__ @distance_type@ compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( const args_t, @index_type@); template<> -__device__ @distance_type@ (*compute_distance_ptr<@data_type@, @index_type@, @distance_type@>)(const typename cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>::args_t, @index_type@) = - &cuvs::neighbors::cagra::detail::compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; +__device__ @distance_type@ compute_distance_base<@data_type@, @index_type@, @distance_type@>( + const args_t args, @index_type@ dataset_index, bool valid, uint32_t team_size_bits) +{ + auto per_thread = valid + ? compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( + args, dataset_index) + : 0; + return device::team_sum(per_thread, team_size_bits); +} + +template<> +__device__ @distance_type@ compute_distance_per_thread_base<@data_type@, @index_type@, @distance_type@>( + const args_t args, @index_type@ dataset_index) +{ + return compute_distance<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( + args, dataset_index); +} } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh index f074a6d909..e367702c7f 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/device_common_jit.cuh @@ -51,9 +51,9 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( { constexpr unsigned warp_size = 32; - // Use team_size_bitshift_from_smem since smem_desc is in shared memory uint32_t team_size_bits = smem_desc->team_size_bitshift_from_smem(); IndexT dataset_size = smem_desc->size; + const auto args_load = smem_desc->args.load(); const auto max_i = raft::round_up_safe(num_pickup, warp_size >> team_size_bits); @@ -63,7 +63,6 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( IndexT best_index_team_local = raft::upper_bound(); DistanceT best_norm2_team_local = raft::upper_bound(); for (uint32_t j = 0; j < num_distilation; j++) { - // Select a node randomly and compute the distance to it IndexT seed_index = 0; if (valid_i) { uint32_t gid = block_id + (num_blocks * (i + (num_pickup * j))); @@ -74,13 +73,9 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_random_nodes_jit( } } - const auto args_load = smem_desc->args.load(); - const auto team_bits = smem_desc->team_size_bitshift_from_smem(); - auto per_thread_distances = - valid_i ? (*cuvs::neighbors::cagra::detail:: - compute_distance_ptr)(args_load, seed_index) - : 0; - const auto norm2 = device::team_sum(per_thread_distances, team_bits); + const auto norm2 = + cuvs::neighbors::cagra::detail::compute_distance_base( + args_load, seed_index, valid_i, team_size_bits); if (valid_i && (norm2 < best_norm2_team_local)) { best_norm2_team_local = norm2; @@ -173,12 +168,12 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes_jit( const bool valid_i = STATIC_RESULT_POSITION ? (j < num_k) : (j < max_result_position); const auto child_id = valid_i ? result_child_indices_ptr[j] : invalid_index; - auto per_thread_distances = + const auto per_thread = (child_id != invalid_index) - ? (*cuvs::neighbors::cagra::detail:: - compute_distance_ptr)(args, child_id) + ? cuvs::neighbors::cagra::detail:: + compute_distance_per_thread_base(args, child_id) : (lead_lane ? raft::upper_bound() : 0); - const DistanceT child_dist = device::team_sum(per_thread_distances, team_size_bits); + const DistanceT child_dist = device::team_sum(per_thread, team_size_bits); __syncwarp(); // Store the distance diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh index 253a9f8ff1..b6562b232b 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/extern_device_functions.cuh @@ -26,7 +26,7 @@ extern __device__ dataset_descriptor_base_t* setup_wor uint32_t query_id); template -extern __device__ dataset_descriptor_base_t* (*setup_workspace_ptr)( +extern __device__ dataset_descriptor_base_t* setup_workspace_base( dataset_descriptor_base_t*, void*, const DataT*, uint32_t); template -extern __device__ DistanceT (*compute_distance_ptr)( +extern __device__ DistanceT compute_distance_base( + const typename dataset_descriptor_base_t::args_t args, + IndexT dataset_index, + bool valid, + uint32_t team_size_bits); + +template +extern __device__ DistanceT compute_distance_per_thread_base( const typename dataset_descriptor_base_t::args_t, IndexT); } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh index a84f010449..4163b77d75 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_cta_jit.cuh @@ -98,7 +98,7 @@ __global__ __launch_bounds__(1024, 1) void search_kernel_jit( uint32_t smem_ws_size_in_bytes = dataset_desc->smem_ws_size_in_bytes(); auto* smem_desc = - (*setup_workspace_ptr)(dataset_desc, smem, queries_ptr, query_id); + setup_workspace_base(dataset_desc, smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh index d5c3c16b0c..6678e58055 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_multi_jit.cuh @@ -47,10 +47,11 @@ RAFT_KERNEL random_pickup_kernel_jit( extern __shared__ uint8_t smem[]; auto* smem_desc = - (*setup_workspace_ptr)(dataset_desc, smem, queries_ptr, query_id); + setup_workspace_base(dataset_desc, smem, queries_ptr, query_id); __syncthreads(); - IndexT dataset_size = smem_desc->size; + IndexT dataset_size = smem_desc->size; + const auto args_load = smem_desc->args.load(); INDEX_T best_index_team_local; DISTANCE_T best_norm2_team_local = utils::get_max_value(); @@ -59,15 +60,11 @@ RAFT_KERNEL random_pickup_kernel_jit( if (seed_ptr && (global_team_index < num_seeds)) { seed_index = seed_ptr[global_team_index + (num_seeds * query_id)]; } else { - // Chose a seed node randomly seed_index = device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_size; } - const auto args_load = smem_desc->args.load(); - const auto team_bits = smem_desc->team_size_bitshift_from_smem(); - auto per_thread_distances = - (*compute_distance_ptr)(args_load, seed_index); - const auto norm2 = device::team_sum(per_thread_distances, team_bits); + const auto norm2 = + compute_distance_base(args_load, seed_index, true, team_size_bits); if (norm2 < best_norm2_team_local) { best_norm2_team_local = norm2; @@ -125,7 +122,7 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( extern __shared__ uint8_t smem[]; auto* smem_desc = - (*setup_workspace_ptr)(dataset_desc, smem, query_ptr, query_id); + setup_workspace_base(dataset_desc, smem, query_ptr, query_id); __syncthreads(); if (global_team_id >= search_width * graph_degree) { return; } @@ -151,12 +148,9 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel_jit( const auto compute_distance_flag = hashmap::insert( team_size, visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id); - const auto args = smem_desc->args.load(); - auto per_thread_distances = - compute_distance_flag - ? (*compute_distance_ptr)(args, static_cast(child_id)) - : 0; - DISTANCE_T norm2 = device::team_sum(per_thread_distances, team_size_bits); + const auto args = smem_desc->args.load(); + DISTANCE_T norm2 = compute_distance_base( + args, static_cast(child_id), compute_distance_flag, team_size_bits); if (compute_distance_flag) { if ((threadIdx.x & (team_size - 1)) == 0) { diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh index eafc5987f8..99eb435dea 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/search_single_cta_jit.cuh @@ -121,9 +121,8 @@ RAFT_DEVICE_INLINE_FUNCTION void search_core( uint32_t dim = dataset_desc->args.dim; uint32_t smem_ws_size_in_bytes = dataset_desc->smem_ws_size_in_bytes(); - // auto* smem_desc = dataset_desc->setup_workspace(smem, queries_ptr, query_id); auto* smem_desc = - (*setup_workspace_ptr)(dataset_desc, smem, queries_ptr, query_id); + setup_workspace_base(dataset_desc, smem, queries_ptr, query_id); auto* __restrict__ result_indices_buffer = reinterpret_cast(smem + smem_ws_size_in_bytes); diff --git a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in index 7a43083f8e..ecd3aed514 100644 --- a/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in +++ b/cpp/src/neighbors/detail/cagra/jit_lto_kernels/setup_workspace_kernel.cu.in @@ -12,8 +12,14 @@ template __device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@d cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t); template<> -__device__ cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* (*setup_workspace_ptr<@data_type@, @index_type@, @distance_type@>)( - cuvs::neighbors::cagra::detail::dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>*, void*, const @data_type@*, uint32_t) = - &cuvs::neighbors::cagra::detail::setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>; +__device__ dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* setup_workspace_base<@data_type@, @index_type@, @distance_type@>( + dataset_descriptor_base_t<@data_type@, @index_type@, @distance_type@>* desc, + void* smem, + const @data_type@* queries, + uint32_t query_id) +{ + return setup_workspace<@team_size@, @dataset_block_dim@, @pq_bits@, @pq_len@, @codebook_type@, @data_type@, @index_type@, @distance_type@, @query_type@>( + desc, smem, queries, query_id); +} } // namespace cuvs::neighbors::cagra::detail

5- zPX2%H(K)IOOBX+&H5c2iIQ-c6mNsFnQ@Y6-L6)`ssa($;k6HMaxWA0ia-V*r>f`qO zbnF(t7=72~zRxKZj9(Mz7;xjS|FhO4`?oEwBMOSNC&recZf@tUT8gCPe@jiSyOa8& z_gbc?DvS8f&wWkL=7$F^&2=>o^tB19v5Z$#Qy+V-=CwDd@x*W;&4nr+H`7BQ24rph&Eyy&%*VP-+&nPl9f1T)23`)3YE>|fBg zeBHw8afEU${qm?QxToGL&#!^{=g(q^IJ@o9x?JPF>w_UpxryqjuBW0l=vma>O7Fwd z(Rwm*Ph_A;nX#crirJ=>qUp(s1A#GP0cYPFz+-AxrTuVhqO6Qr&s|FT?!4LIc<}|x z*mn_aavNsutV_ig)mN6GAh(_TTD$ts?fZqUctH1X8M67 zIw8l|1&hz|3(e0?S=3$=$#S@NW85N9swt*-P)MjZTy0-4!^HSR+40uVPwaE^<&`$> z%a_s9$*vDzxUzUHaXsH2`Exy6XqQq>AC6ry_vq9OH{m!*tEA5Ryf(_6uvb4*(%Zn7m#jsU>|0-8Ejy`BubvCvzX)q(S!5aQYkhk9!;ARy$9|;TQ?|lWDH3BG#9@$A#I*K4&u%t~ zo1TC6E(u=ETsQyNt|d2(0I%OD?vf40eKIr{F|o0U&z>p6sdpD5dB)+(hK7c@1239Y zH8nr;e1a1v)BcD05XWlr{??D3n^osZ=myY;%&!wDd1ywJ)c&H`-{wrzeG2}o((B0{ ziFLgsYnGFZaplY6+5=aQj>6yb9p;HsZf*sY2fJ2;yU$ORy?_4^uAnFIYM8<)`pzIQ zD5&rHI>nE$?mY%(A9x=HVPWCWgEo&9#fFbsG^Q7KDlI;lKednZJUY9rqf}G$lGmXP zB}|r$6#ZU*-nqLurGHh#D!QF3BtFpM`TJ>irpU9i^IWoCp{`#Y7?~LTS)Gvv6F@xP znUjs}Ni}Ny^VviYrsF<0j>#Y4Dk@synY`!+xWYz2t`k8^-RPO^WGVaz>|X-NXcypW zj;Po|u1JUOLsct!x)9d8UGy5V6cewG>|WfB&uIvD1hpGpSM=05v}M!JTD(mP%zAOK zl!##dja`dPNj!ev*PN9)5AIE~f5XAv&q)LEs*qPl_;jjThw6wnAT=vh?_#; z4}{!l3`P+lP9E5)bwTd>2+4OMzMj&zmE^N{uWW68>cp!kJg;^UnaX~34syxb4dSW0fL$Vyf*&2SNcN_!vziAufx(XgMFCks)N|!rPN|c@|EY?Q$ZRmiTbi zZLSLnw=qjU@(CztmWEo8hxk1b?)0br7 zA@Rj{z*`u{oCtbs&y~cLvNbRdq+*36z(wbTF(~61k#kA)slslvm-p%BX$A%c0tA}+ z9Me+l2b$F20@B#cXgWNF@d~~>?V&|49cqv$z^o_rDkkz_)`%iDAzLwaNRv8)GZWXu zJUy1IHtv?d)@AAzK~Z2*&!bdAy>$+>qf#WXl=nz}Ww_{%JA9QN zTXHTQa;$Kl?b_-D7SpDJ{d@P;0Wg=0&${uDJCIa<(dFe&9iBskE9b|_$$94;bn<%s zKHPc*XY!rKRZL9AIC5u|b#(`w=)KwU^w!X|aeFTvVmazDxqQc~8R{9)C-RzG9ZuBG zu6)VnHn^u%#oj2mwQ|8-*TN$CC%c-AS!|jYDkf~n2RI6>nIomt<#6& zzF2Ew2MHCjiXgXXyVQ;E4uvz}#;J>EoUB5Wq-3G^_EWKTC+>cCjJ1=mPxYx%{rG0V z{Rf^ONtMOh^5;AzFa4u7)`v_t#r}{0x=N|Rad~;@FP$%tY%Gu-xGkW z!?`dC3D|vC-a@7XyXs!L^kSz8*c&I@3e(e7`~v*_4fuV44-%TXJQuhwbf$gRr?(%( z7&cul+_v_=mttZvcJvK73=U0pt6RjynM`D-2=B@5U*?!Q>y>lhl+1;SdWM}T_pj`H z``;*3i%iwzsKcU66j9OlAtV|8yFw>!HlO%_fGc;^X6U`_n%#uJRJVdSUh% zYRrh;2|t}ZKIZ)J>(@ty*y+fk@!k`4R#Ic4s#ZRjrSHqdm*1aDrQ2Pwl+{W15P9qx zswlcsesyHCG`Eg!9M?=wvBHK|%g-Rd^!!hN`DmOpne*ZKbiz0ueq0-S$VBwFyJP++ z=bk+$4{rVh#$zl&zBGyTIfw=Z1U$do4`%32IRH&f{M%FA^Wgeotys0VjuZbIW2jP$UmoZcwmmY3@=l!6@!0~;4=3g^eUnOIm7;pis+i#+qx*CrGl ziMPuy8e3jcu|bt^&t~dthXGb|=G?#Eax6a~ffK+c`6y*sXgebUa~2PQjPOb z;ptYb5*xh(JMI&7d6T|4V=9jdV4LF)+h?CvU(B`ZeGKE)k^1`jTsTbBPL0mx9~sK- zT7^sX9h-nU7MQkxF}XYx=NPig|2KYq7K^XjSN+7Rr^^kG>_!380JN_8tve($t=e_5 zscJpH;0y6?9LSOd)8#}ovgiZ z)AiG6sTPm=_uJNp;#a;|YQFkaNlCaouZY<9MdLp|{mQ`~fNXWI&wjP=*EMs44u!YB z3rkB&V{0;}m`pPOj*i-yobaC+n7kG&;f;_kiIDF=NIZxg(Kc-sNr z!#f}1;%pu>Tl|4F%^oQn6qh}DDc5Ym*TF9meQg70fSic-w@X}c`^dJon(udtuuN+E zVvzRtk88f)2HRk4M1sWZm(}}NCrc#ePsWB{3jII*g})-Zjmy2>L~N#7`~>|ZyE*(P z;@tZ?;;e3brhg67nsk&Ff96VbaHkWcTPTmd2jLn(^e$UmTnuw*u!zH861mk4h4Yzi zzus&WoXU$o72Ag!+JGP;ASmdmzoLj7T#>Iqz67$Er6(15yg;g2gX-<;414S7N(T);baG_?ZLIkr6;29Q0!#> zD`tFuws-o3$}%OdrAfs2hqQR-o;9==K&J_naCgfn@p_|gc>er%1Z2Zzw-ztiqYZLu zvJb#71hNEA&>a~>bHBQBVJxT$WoQHc-aYrt@x06Ce~v$I!mGVPNizfn9i zt_p|!aVT#Sz;c+KKY4b;Kc|o7l$y-$jhAj)Y6+Cr+AP^L2s9nmq30X@yIVxnv2#4% z505Up&k1??Q}AYzqsP9trno0$ae-W!+oiVsOTZ~blaXO;u%`i+_z1#m=K0dKxN>?m z5!zz1HK1tJE6d!EK75S^!*MNZNc;=i*blI-vLqaT-S`vO7L#M_Go!wRV{^KvXqia^oiz zi4C#DK2&)sQ!E>KdFNyNv&2{fDmTqc)4=&(B@($2G>cc5=jissH6&CA(KH?#=1Pqt znh3!X5)$g7q?RGrnY+@n*||T>FB+T6w1~#%H;90PtPsfX-~3P5x%g@nXWijYy5WQ| zWDT9gx7ALcjt9KpI6#f+{Bp_0ITGUXPuKB=#UA#lI5b;*=E??^%s{ zO(J5mv`NJUxnxLXnZwEUb7p^MBT&7m;Kwh^ z;r~b5o5$s}w(r9sNo6QRDvgL#q)4PBDWsA_b0`fmHArbv5sIX+lLpPD6seR-iO7(~ zJB21ROEhZw9oM?cu%B)3?;r2$^K6f8->rMCYhBkl9OrQ!r$9a5X#eJj%N$-_UXGAQ zkVez#>B6+bwc@HpFJ2t94@NoL`dwy4h9AVsa~JU$<(#~aI4-KRbVnKA1U2RWoy|QD zzLrX=Wwn{uJ$*)FKhRzIba1oi>Z}{HKRUe3R<>}}Y}xl4 zdEaUL=T#O>c^;9#R+-C`8?h*-y1M#wgR>y>nMauzu-jd(r7j&XbE3OKgredw&0XPP zg%ow*9vZ5icz*iNaaCJf-hq!%ra_LhM@wr`_jJhKtF<|n7Iq_$#&CpRe&z1b`3yNt zf*z|3Cwc22@qZgSI&=VTMjNV6Cbvl&Q4`D+KH2JeId3?uA&%`;RyJSMd zs8v%b5*03g{``3vG=roMyy8Jui3+r$($NwhkF3V<&6_v=kr2C-8XSU6P?gLvc!W25 z{p5p@qN&iExU`LLaO#;dLD?MJ;YD){MuttT4xca!uC{NBoFviBmF;Zgfff5@l{m#r zOSH-2Eff4pDsVH6>(*w@Sw?~eyc_&s_~Y?{vY|$5S(}p`;@rDObqB)jimc;@Z#eh9 zk;S+>Tg7z4iafGIbDxA1k065q(4k{rYCA{^(F%qDp2xUM`Ni zb*mgLv=tCAX;hrJcM3@m8Vr#%jTzxVIC@*;0q8%M)5(d?729ZSZT)BktX&*2JZBa- zK7GbRe_wqf!tL6*D`O)gmHf+N)cHtyjXxs{m#^zrmml8gUk(AM(M40$Gh`>Q8PE8M zj_JlEB_)OX@T;O-#6J=Z2g-##pWo@hL4UP7M7@eSh1ndvj!+d^sCtIi{+e(cq|arc z&9P7w0+vG)3W-FBeziW=oxgv$DcdL z;)`q+l)SSS)_j^*cy}`OsH%4j$e`bnSj`9kxnU$1gY8}_CRTyLDXCBzfg>PkI3laf zk8CI_XkTMZ@PZ5LznhbYJ3c9?5<7ej;%w8R*&jcCygwLj?DJc6{BZ4D1KUe*Cu_2i z&t*c0K?>br1`vy-oEcQXrFpLU``@fZ;4=LHmD(~y3NICg+=f!_aL3^u2wPaz=X>F{ zv~_iZh@sdjk-amw|FMKE#Gm=TJb)g>ZsI4v%#4J91wN>DAml{!=bXVqH%@+iHr<6o z>DuscJ7TM`BQ(r9iC%P1vEP^ z5SNJ*qou>Mj`ioRBEZ{&z>tvl&23QF9&U0Si5dAOKU!AeL#?xd4T*ZH@_*p>1XFbz zJ$L^65)tYHGbbUu5*Um1`slGEDt9I34=4HibHIaX<1n_q4)-9ZVGbu}X2^mDM#Ot0@J*p&KJWcHnwsI5 zW-C3Ay>a>)C4aK@`3}&d2*LVWeqXSX%>;KLy2gnHS!r(?(Z(LmDxRSLVPv^J_kskW0P(tl;s`j3Qr6CY6sry{v z<635rtJ+%E;zz9ngMv^FCx<|UCVWx8#oWjZAHU8879R3kaN;!-iQge}*$t*QEtiT_JZCF!AkKU)ScYk|K zu&dz~b=Oktxo(8w+KB(FH#}$d4+sfK0#IXsWT=w-x7QnIwBUVqSv*lu9?#fn5zm zX3|9q;pgXk%9rF+GJZ}Bjzt4+JDrhCzu0Z8OR4F5P|plZI_Gb`(SoV`I5y#}5dM~*V-=IBCHlHqe)VTtY?Dt$)Z^+>`fmXZZBFjmz7JY!9<|D4$oNgH0=KF7yR@a zeIb*rn2(NyatPchUwnCEtqzEL-$8H}SDi0&B#b~>P-e=1Jo{2uRiO2SSr3zet=sIM ziR1g@cj`*~Icvm(%PTA6qobqi+n~kOhD}znBqsP~YHI4~LWq;aL%RPJF68!)7s`9; zw}HBlTmPr-E7LGXxEv>?^h*y&@A1UOp7`{a?*#}MT|Z`Xf0>x>#aRzO?e=t^w*NH` zmnZeRkA%)i>AarV(_3pz-J zQ60#`zlLIjS& z^Q8?^K#|^;JUH=<#1Pur+R`Vs@y~pX);;GCE@~mFu9hh(ZrI=sk+|5oh7KDRE?h`n z$#(hSZso$0h`mT{#1=CN)&?zX;9czYGG!(U_fzBjPx&8UegSTKq2xR{N$+6-odC0` zL~a&yMCRn{eM^L?>(_;pDI%{qaKH-T_Dh#hd}e(e=XAFvoF*?cmEET8Uw&-=Ohc@r zkjEDeE+72+inqW$2=Fjne(1xQgdDdLh?ND=;vF^`>bL z4?Exy2iL7E5am=9+RQa!>`qv?e0cZI-2Ylk^4jjA=g;kclf(FvU;Nu;#Dp>UckAl6 zM?A2l7pYFk9u@LpytY?BN;ZJKZjNM?EFMU}BgLIfk%P8Pta3L(JXF6!Xy{6oXDHXUNaCzP*3~O-YMsoKl5eF+{`3!^sK|xr~L5!pHTgJyRvoSy9~3an8ur?056g4uQx=6 zX^Pycnar`#(N8W~&?6BuEZCnCmduAqT*o`t zj5uKh{yIlGto6_jPuh`Qo0O20xrJ0uNDzHhYs17apSlIC;eJ(t9USzNnsvuy5|>od zajtN!i-vFuE73f3OpYFH&zyi)5IXc>F)4tNCmtSZlBexCa3c!COP0)ITXo_j2Ff{y z2sBJJWki9(iyVFW1!0ZJsl6{E6=9Rf~*uEZIhU4?g_2{?fVPxug(-(8pxmfJX}E(VvriN3v)Q4{LaG9(gAf z+~mspm#+zSY>i_u&{?8 zXS*>i^9)0klOt2Is+>0k6>`^!)NJVtyYc9VGrGW2sO#!Xxc%&fiXbs>*rcga(0B)vbK{E|0?AS`3RC4*F6ix@YF z%mDBZP=-`KNy0>Juyx6c*_j!;KV3g8du<^HS6|Vp!l}mNcauERk}~2jK`R%Mz$)x*7xir)vG0E+v;SzzF%>7cL!kCA29fTHYk#O2~GfQt~ zBIjW97)PK1=03kDP}Cr2o&P+n;L(ZmRwXm<8~;MO2N{b8R^yPMK66##qA>6{n_!01 z)0X95Bah4$ME$OOdYnCy9@aLM!KHyQnR+wi>*!yK-+kaY&jN!uBGcY-ZLNJf@np6b zJVGj3D2r?C+I5AAZC)7K$1P!K*nMf3Uod&1yV>ZTOxw7aZ}doTS3lx0fKoUS1utY& z#Df`D6$-Ku(?c1cO9CEW=5zT>$2Z9%9L+=wu#VRTC63Jc)7-+Un-`pjBrg(Cw;JEc zFYceS`wZ1W*BjttNwW6s^GY&=lzq&?Aj$DDv}0=w56ASHTDoh2wSUua`v(#l64^_)Rq?f;MMB znplIBoXI4OSi{BA(%Q(uA`s#x0t>xU-vtq~RwQ;bbiYO;*|)+|u=}pHYb}rcL~3Tu;MovMLmlID#x!$VUr($(H;JZxmGCoo&SksdChP%AsK+ALA?YM9^nV5i_|zY{;$qmpfAT2T!`_ zD$)W#(W3ii%v-MDzf)T~3WbUUFbZ}VQai*=sm@AZgFmSRSfY8F*mbCJ-omZ4p-YyA z+;&E={+4s-BbFh2j7+3U71?VSpEtg+>Dp5bWjESWV_SqYicETE$T!31UONfhvW!k%T zSw8b$h|=CJqNNLxAuNn^$YLsH3ffxNH#qr4Fq0;DM2)l@(I=n=Uzb74vfd$Jrty z%?e@SU}Oz1LD}NAUtT4S89kQpe;T$Ck=pDzb5O7l;h8Qvlew9!GZJ!_*G-r?tVzmZ zO*TNNFOc^DC!AvKLhMuRXq(7W3W^wJVCmYJy9R^&K48Jz5T98x@& zGIg}QH~mnf@_OBjSW)MxaXY^UJ@}xTmsy37L={$r10%AzF5MA)EtzdeR$r^oAWfJG zi*MNODA18Ge+H>_JH)V}JkzZpusI)7$GU9zFwIWcs1Gm0^?O-pdHP|acaSm7{Pt?C zFY8nw^oPE7eHUE}|1+=nd6dIni}z0Y$s4#-H@1d*hlfjp5`cc~nk&}cPgax|di6dIvNLl5XqJXDBGV|tLmM^?ovUvfQXTib-13}6te@pN=QXI=_ z+ZYrWa^d_HQWMO{{RE9ZQjtnD%+^ISY$?Vde0WopXRJXWRhYDUNDNr03cQ==tT8NR z@1*m~`8|+aldxqVO_W>Ji!JEeU7uu&j-~2_n+{gzIA;++OKi^_C6ekGjegg`=~Oq0 z0Bq+U0DZQW4C?(AOay5FHNHGKPDOzf(VB(y2y`sSBPsiW>~*}{YHNY&Mz|b8Y61|H ztLIxne7r4$?%M#kS0ZalY$=OL|DN;Lpxy(A6g*Ef#uW8*)X>UOsG!n&GL|3guZ+iZ zQ=EY)0j>dAD2bcrntG;anK{X>59);stozlOr5*N{b&!pml~lw$l+$)GQ1so<=M@&E!r7R!=pTuA{T)=S)fV)@vBZUi!L;e> z#*^0$4wUbO0bpnZCq&k$fw49kW@gdEC=pm)T3T8b*bIT?s)yaF8#y8jf;@7(>N}?A zf&HN3-RUT+vKKy^giSvegsgrp*inxTm8d10J|UY_DkVJkW<5t+?pYEkyDbQr~Y%KSg~%|1L%Ybp`(s_<`nY z`z~n0vlBW5$$_WUI-wvZh3X4onN2_7y+0qOUS6F+=)_n=oYPUXoq?pVKdOGY*nT^A z?u`YWJ4a4w*otEpD_u*J`_?e}!pcobMnjp+LqU98o;Q3XFbeXl(;=blUG=>Uy zJl4Pwg;^Z3iuR+MdF=wod?3C9r}uB<%A;}=5=|mDB4IL4n|>gBLsy>xac7CT3stdN z$0O+6B!!bl0I#98iOQDaYkKi9E;6zpW{i~1E^8SWyxWjV zFP5f=RecrcMfDXf-au?Yy4lP5A3uEPUoIrf{do+>L70ya%~+s(96+ICq~7X?VTZr> zlmULZdG6dfvx`f_#VI@&ut|09S%D>gy9FcIqboOERQ-=f~J*jLD-suqQY#zn<5$}1`?a9DNkn)xC^!lM{sc~W4`;(WP8lA-IU{oVhH$*M zvwMT&FVi!Im&XiMVreLo4}VxJe^hRou*i0X>-z*|kHdI*PcLn7ILoGdkxg(1#e#Hf zS=iH3wwXjrBVW$QXN1rxE~;Y9juYLif5g^5!~3gGjk}-n-_%5jY!+swl5&HAI+fGA zMb4e2qe9Res!9d(iITnL*J}W+!7m^P&)PpCPWL?@nP3XRvoU~1#SwcG69r3%?EofX zRDYyDtZ;{lLga4GvtW@p__8S@N)+9nY=@9hiRqn?$Fq;1rj}ujX#&hBO^fnM{4KyL zhe%LNFfkO@S}=2}Jp9apc!>U8)lxiV%)aOt5F-jeV@7xdV*CcCcY+>gP1~zdUb-Ac zI`kcWrU$j^IM7q%2j3K*45p)Y=I4;c0l_cFaTeR1>?SyI-6KVFTZEEhcLnfh5|^j2 zWb;g}ZRBCT*JafJr9WvtG zzx@8C4Jd(8*^TkuAW73LcWbwT`+;E+Bsdfm6@6Z~hYss}Le7lo!WwW^H=ZxycmF>i zPsq8BjN>uxVbAC0VitL-ao$A2-V={;XWYOgV4uu<`7J^6W^!EhK7N)!o93@>OXdSH z|6)6KT9rNk1jp=>On`CGR!R4j32%-O5#46&>A*l3fYP!#1z?Sy@>*2hC|UQjeNsy8M9%Q2 z3$tCne+wPG6*|%#10_M~H3=6|=D!u8p%yA~D++lc;3fhL*u3Ze7erN(dgqSh4ck&x z!VN^xO1K5Xw92u2pboq49R?$k8$uo&i@l5K5?cTPX9Cx;1FX|bDt?3Xf{)(^7%W1S zSt=)IfY}NqpnkO;TdKkk!2TBb0SUE!!aI2v*{&qj3fq0LRap!d)bC8=gBcBj4~mMo zAiD+5I*WXxD~OVW8%YT1gfqEAQ}YsG9jWe+;n{^h14fG>HDx$GyI*gETnFqg4P}KF zChSjshvo=m)&DDzX;_&{pzCFboE`~W$Qi}pk*)r(}!s{+^w`zkgz zRt5I=7W(a*EUT%dH@=PC*;$!*9eP3r+b75{oL~NFXx#*#4;$#1UVobC+6icI;T~Eh zbWk@tuk7fsGSlTpz?ZS+i=_kDTMaM{-9M5KHg3wg#rO_TOi2nXVMBL@FA=L`U|?Q1 zU|lP)(127)5saja`nG=D%WX}^JT^UkV@vXo&Zrptw(NmpQa#H0nC!C#+kz?%)EsTE%J!pjVgy3q`&CYCS;-|&X(Ofpz9(ca{ zCr`K;ykSRw*m&&o3;K1r23sxx^TAd%bRJEh^P}Ek*%|crjydizG|x+8yyCR{zS>`s zkH?>06s=-lYyXpz2Mp>b0}IDtvJ{J#;xwb{t>8TSS_nKFk7=-h#>^@) zu5fSEqA+y5q$w)iHq4&Q#za_iyhjVzN$S@n$2-a04Ho03O9zNY%_#N9n7s^&BCZLq zVwcG5Qjjl_TnE!#%Rp5LxN+m%2OA7$kH^Ry{jFD)X@aXks&)X~32c@9GgVL}E5p34FajZ?C4fdw79=iL zIle~|jr7q5rF#}-${Wv22g@i+3S-NMVbaQhN`;v`Iz1rin!11#lb9z_r47_2ewLId zqs!26{kE4UX;26=stE`RiYE~nrtVbY>)(9ohheZop`(qsUGjBs-={mXyPj|pN(_Lu z+n__og1+Y;i9a-hAyENljcsdO49M!m>7kQK>d68q{)ST0gA4j*Kgs%iU@v%1v7dpn z${XFu1Bl&SFiTx(sgTMJ_`hxpB2@J^L;wTg(CU+i(PQKviRnTKgwO<6TB^4CT_h%1 zSbqKRZ2gM`iFxA%x_MlZ%(qdR^BaqV#<5l-)~O^KQ&l}1Dq%q;J@`k03Qqb${8Me3 zwr8UWz&{c$qT8*hTz(bC_J*3BH7Cfe9qDzUQz_uq&bn2<*ah4dUmJyL=S0qBR8 zdr8vbnXHSp2aBkZf)Hx_;;JOp6qeotN++0VU>$*4wr$RwIYrN&S#;R}cB6f0L>7aY zQ`zuw^Ivv5R~+4Zx^0P+sK&@}$Eu>O&!sw&&Qu62HrgCqq9@eJm-x2bzI1(I>aNJ! zn)AIoa%-pGe&u0Xy)$yHE35*peC|EC{w#mx-FfG^ zZq04mGc;^8x_Z&d-4~aw;(Wd**#5vlvt+|t2Q=5FIjst~O2~EG=RYpnQaZ`ib=fD$ z-4a}P%!NF98U)TGX~1DkiebM8zq*s)R0O@hfilMy)p{vHS!Hk51WoGx<%3X?|y*Xunv~RwP z6)pT*$mP2oYaL_{4SeEI8+deBd)L<+_h4{U7VOB>ESH6Gfks#-!w zve@W)pciHWHo#_=3Jg4<^PYZKd;4?l)e-OwkYR-5XSu$&jV=0wYKEG&E+TZoJw=DZ zI$l@TUs0{y&dw6EueKjU)zr(TtXDs+_iLwwp40P{jpb>2i@oGd-Y_>3svLE>J>oh; zl=kFf+~IzUM^Q%)JIj2hiHJPyu|3o!e&ud|jm^H59zA>I&M)2Gb5KNLeBqbHRdHPA zx^3+hKU{kl=w5Gp4r38#x;XTABU3(Vt)iP?)6D#H^vm ziAueY{SfLl@tv6<`VrD2O^a^v)xKoY`8r6*yqnkHA>pSG5o5pE0Zk+1F_g+nkg6dY z{6W}Ut88i-CCv3MX+rPO+wGP0hHw5<8Ev1-!Lb3oShg4w*$mA1HKKtYRJvLNbqq@g zjTC{?Fd&g#aEEpTr#i(d{+d7{0KsTw^u2OSLq(=N`H^kWl9>MCJH7|e?L~|s47xO? z*vg^aJB6q0?g4ex(oDOyAf5=c?7&}WgTf#pT);A< zA0tW$^E=*WqrIW$3Ryvto&x5?IH@xMXnR1;rcU?-Ujc4K?Jb2qvPJO?MuBsM=A7MCDIAN4~Oqo$h zDj(pPc&sObcKjm|jTqIQ{HFG(5d+9xlWSmbC}3N=T}{($5EP?bjAXt*wv=}W{4X5& zpyIyGfp@Mc*X{(P;i4YfvG2~Qy^|LRB(ep=6t4)Q=+%r;;i0b?a7ZdJ0{)Rm zK(x}|*BYE_kG{1s0CDJKFd`9ZXjB1NCPS(RWB1aPF~iFmBr;`{lMsY(;8eq z9r4WXnCm}5jMTF+6s&@$Lqn!Zj=m7RG3H$BeN4_RM*&Li#*Dplajh}nmRtl;85Q>K zQLajL^oVKv?mJ#`%poPm6i>u!q@X@l01ssi@4DcnQB_CE{w6x=g}t-Jda%7`%;s>* zMnp&o_;u);WqkMxNEX0;&TQSMmVTEXp_qr_ME)Y?OUD0OaQ<5$tB5AFi$J;lxiEY! z7?rRH6mK%d`U|{CE0^>itF)JC1ONn{PE&I^3>bjyjgukl)Y^G7;z!X0@P`8LWj*9f zGCYUAI$>Q2gEMy)J-A>GhkOLm(|@SoqK->{EnU8f6aAAL`O+ZNPb zjzmiTq;ib0FaZ~L_#-SNY%$h3bGg=Obhz*Oa0qCx)Sc6Cac+ygt&yKtGPD5Qbk zMFX}NYw9QY0bG=xv<~&ud%3WLsLZ5*P3X4)1X_Y*jTskGW~fa1YO61mn31s?y1^Gw z^6Ume*G$+Jq)8NM3*oPmQZI6U5)8@>w%-j}dpwEsP9oOd3R9?=`xztQY@aQ>+6+g! z6R8*`n?~k&Od4z{Osb8SeOVU zx!-U5GpKgxDH5mS$zyCE!k6i3{@7AwUQNn~t*w@F{dFGmCHJeGFAPPn>IfbRZ~$>R zRMa$c4_R8qA9~QGObT_#1WO22wH5Ie?T+blThKhsF@$=)oMUR216ewfBE$ESV2qR) zy5W5&);ocFAp`LD`#VHY0e?k(u$^o)mFKNBO^a!aumG490?5P;-4)#OO{cjJk@^)9 zx*M@Z$8q8aA>0;=vC8oQ>i7hf7FH#7IQTI=9;@Fq07MrqjH-7+B&&mol-=!jiXr=K(wLUiw;+~d6ihmg_en?o&`F) zc12?lkJ8WsY)4|`()QFZT&;1Y_OK$;T)Rhu>(_D)N_Ts4ysT z32=}=0W{@*s*VK{jy&p{o4bR-RRRs}4G$)+-Uh>BGpzIcm1aINiEU2PKvB#IIMR_?T>_k=s%#TFY!9P zZDoZw{P+BE|BdP$Tq+iEoUHZCswiN|b~L+!$_ZOX(4}d7>-7ozDwOk8aX66|O#ZS( zxkjT7m^z@hdJo-xYP{}YWY8I}o&8TK&r=lDX2NamA$`<1m6Z^qkI3BiaZOWjgz@rN zm_K#2zf5`X1Yq-Q7B5>I{u9!6^lLN0HZL3xW`N;RY>6Mgi;@~%!#UAzbls412x|RR z_%{33P1}N`RAK%r&|Gm{`Qq`G(oLk052IlREIJc`ZWzJ(7+9Ddh7e_tnF()&IUl7SaEyp|E*p>g{P0i z${2@Sj=VIMxZ#Y(3uWjI?13eX2O5PwX2pais_vq&>?WeJd;$yQQ{g#P+@N@PU~hY) ze+Ri_W1~l2*hok1h#Ce0Tm!7d*VA?-Fi?8P$$3Ti{egZE~_-LH0Q;|y(k{;pZfPB=3 ze`p}j+(TUkaNeSLn%L5%1f-8ChBh*d023hB3CFLYVwRZ~5oay=K{ERh8jex90OI>L z_mfsCRJ1CPtws@m5=S`e##wrtCbWSzi{W77I5hY3=1^se*v!Nphtb~6$n!LSJ@$Y* zmNDLkbrIpsd}Iz(s`Hcmj$$LYIM>#Jc-0fZH>?6bV<$&RlrZ=&!2Sb^_MZVS4upib6iBARzl;TyU4LP@`vh^L6{Hq z78z#NhzK(b{$7(J^ohSSTc}UFRbf|EP5GUC0+aanLJ$2R8zy-%8cT30Qao}}Ng!Dm z-6kK=R2B*9x)#$o2>C1O(gibErn-p;DV-XpTBC^tmJhsKj|54@bt5@j#|gnivrcZ@ z4x_#Sqd3NIMjhuVKXo;QA(BEq8A~DQ*Fi`9k0J*BLnnzv8)pjt=|e;0L;s&7qdWA~ z8NST=y@-G=2~tY<05OYsmXGX$a;@Xc0$d`s1Fxs9e4w}Hv30pzcJ}Ex!LeG^z1kuV z={+ziN;s}kenAl~dV{p$U>+4}ho3@*r7vE`QZ<>*KJ+3(I_UlP!^rL)? zA)i5|$^GutYdEMH$v-vRFc-w&u3Yx*R<5F5*PGjtE;<^Gzli#ax`!cU>~LR%rqm86 z4D7M&REf|67>8CS_RD3@F+p!7e%8XV%V|L|6%W>td;g zUfYLfGG*s$b?gj$7w@AU>#gch^onl%Q-2I|MoN<9-NlVBYSVTWy4s()6ZE*I@QV+UQ@iEJ?S+vo^|dJ=j3cX4}Bwqo?OT>5rH4N`2cR z(Tm4BONDW5Yy3Li!%pwSt#4OHNoy|@TX3C&xnqeqPvnEw8i!P#?XvVM+e;qMn0vIC z6;|fo667#$+O2L|eSY@+Ol3E+?bR}NJkZ|lolmi!+csZ-Zap?i_P5gNu)W|9avVw}-I zeoZz}8O_6o49Z)NjvRTK+vdH>;9gX;qMl`%)wj=bu7}=a8(gB6mxyg{nJrbgTL}Irik)(w~ms1OW=M|C|&@oytXiChR|G zoPHwPEoyWO%D&zUcidXO&vxr`+MW(3k=e|x8uy0orza%ono0*w3E)g?f1eZcI=MI$ z{~E7tP;RqQ+c0b6$9rp2cZeA|3&{B$dpKuqZ?2b;bBch(!mUg6oyjIs;DMq&ab{#x+*aKz&Wnkx zw73WMg)UKE9o41#|1!z4??2Mx6@SEc)vl`+`|S3dFJzjnVYnr_TKZ^|SFhSz#hYr& zeG?=gqUMFe_UW)*JXaK-h4#lq9c-PuwjbrTQ$Kv$Wb)$HfHY^J@6MAM{DExdtEJj8$@lJW-LcywuwrNYt>_)1sd0}RS5Ax@)OW5Q#9nfpe0nH==VR3V zl;nvFDvtN+Wz|Go)XUD*uKjXOUD{MP&Bo*X+e{U_G&|G6yv+DUcWz;IDqQ?UOT(@V=HTo_O81I)jbC13-N5ynWc)up&j!L&I)Yq(7GGQ zUF6f2tH+?wkT8wbvKmG|UGPDE*|-PjI&(oKW3m#%`z@bDlaciH?j2*1ATcDR_A$qf zqjZz&;tAOtBj_8*E4qgzeU?(vvE{inBTcO0IkuafX^OpPLg3|~xX&#EnUQr$H-g>P zx^3Hkx-^h$i%ws6uh;^;SxMR_US61VJN5OK1C4t(+GRZXK6JcN+se6zGjVmscSVf} z_pIvAu?{SGGcDcE`05gdhljBr&S&^&biPbHFO}UdXYkI#yYTKrKN0_7mSs@4(p2_l zB$SumZ&t_D(_zT@D%-@aC9|0=k9^jgAM6wWRlJ3&sDxZNFvn=e#PH7BIj>J4Z|Aks zo|4^=pOfw{a{HZ$^N1abm5rL$D%wt!>Z*n=7@ChUW(kSaA`;i)UIh*e25=smKZm>e zbhYq{S1TCYxO=j$=~5fSQoi$&qNuSq=$g_jdko9{~6t9W-xwA{Nuw zn7n7+@9K4}6XV{-q-un}9MnH*@_fDN>6g-Ts(F*{jjY^RsU3GGzIAYl{VLak=T2E( z)pfP=>cj$S7*}<2a#(EGerBSAWCAPALg(G*?9rW8I`0lSCtJ5{I4ohh7t88vFdRJH zs%+DS$a&)TpGz|_bIwRe81upO9yX4$v5n*@3c7i z=z6X!VG)8niw1nya% zFFXyJ`6%N1(x zd5~D*^K$ggsA{i}QWM+#|kCj~8?*WXn;;aZ&5 zZabE(^_g-rJm0-qEN=a^?_=v$o=da&fj%|r$SAx!EPd?qTG$Bp{Pu2GA_EcKD!}0s) zj$h?AbPiWr8HEj6pfT!Tzue=755u~Waxvx{qi*^BIZSwk_jdEgj^4d6$AUlF*+}q` zeZmzE)l(<#cMGvhRao~&Li7aw6A-Gpr}k`TO~AIEqGl1*rHLJjRF{f~$kg?>W(LVE z^jZW9ej+qsMLC~Vf^3`o#!VgDwaY)3$HzBtUR0ar84&7!w=a!-P~Sp8I_BtK0t@vz zcH9k|&%7s8o}shC)2dJJt-Q*Hu(Xe&Y&;Hj6SC6zRX8V{xNCoX&NiA`yJ0+Q=bm{< zth6P!k}V&Zc+J^$qqTS7eViS!1aTP&EWPaaK9s~sERtF(=?Mm-!J|H$!gA>V?$#WM9qp@?8~0ro(U!8=re)MApL@D#Q_srw z)oH3F`#P)CM%hXNRV-UAcg{sS7@L%AwEf|;kzJ#&0#uC6Hs2Ud@OXo~n+U@Hs;IDB z5-old--v-hf^pJ@kJ0;=tX{bFO#H{uRiEP%cI{F7I3dc*`F4iJ=z%?IQ4%^vu#67@=GWzOm~teTuNj(zITlOS&8Nv*53ww%(qJ*4RqdKG`fy_2NR<007REu&X{8gF z{IOY8px*z~EV*lH-L0KYeEyL#LkHDoqaNfRl#^dJ(kphUUGIT+q|(=6Z5ds0 z7bRW6fsu1OJ>TMU1q;{TuGn@$*q=i+LiB8^#)7k{N_SlEM}HLcD=<4Nyz*N82fJ)i zQ~qOll!0L`LX1}bE79VospaSzu9zE?0k;=*`i5OaWs}jIq>?AClGeAxu>;*E7HxfY zuosRvZjEKGdWyixfJ4qBVsEr{5^Ix&u9q~_pKcX%G=I0vInCK?fy{@cri^7LubJ$v z;Z2e=-(Y;ey2nLl>hkP-hIIZfI~@5xx`|Fyvhi{!o5~Z=|+QQqH=(4@@uwHd5ts^9)Gq3#}39|vj(q$|D zmle0yVEN3QSpjnC8h)}UkzA;bN91Py=^X2It2C?U?>Qb-25qU>^WJOjuHA_5VZX}H z8C=(0w_fO4rIJPcpq^^GllJKc0n3UHU)A7FDm%PuMD6gVjLQRvJ-TZGUCa76ZWeAA z-RZYd;B{y<=B+T6=>LSqqZRFMI;-%dU zeYSQn0-AEU(yfO#yzZ`;*qfEFVDMaLo_VH1@^z<6={q9R`In2$%*dELLGU!i;2}q&h-aqtDFu3j4bnWp%60El_ZIOOfQ!DErB9Umt>wW9f z4u^+VpPAf)gQBaGy{9dHwVso2PX#(8>z)0}HRjcEGA5S26=R-mpXI(gWAcWUS36X9 zY_QY=ng8T-=4gwda3K=94kz|!dwe$ke1#7G1oBSNQf4@!LB&%XGyl{uJCngJI*L(o zsfx8#>OAi0XYNHLwYrIlmDeineeRO(2g_FfNPT8@X%Gud>&vuseg?t0ER1$|&j?73 z5edc_io+F%p1&ugpBP-e%}%|Twh*Zx&Re5Pp(KOrk;$BJPS6hfXZ;Xn2DjC}gsZZt z4nv=_b&UFRQeXFTa_IFgS$Qxi?)B#*i8JIkwRe5zWK=USanqa0P{KO-6bU$(nifD9 zGFzuyMC8aiQHHLiZ*G(Dn1CGrftxV6Df!K3j@xLdXSe*p_x#>WgJ~Z<#RlJr@2xG= z(Fqi}mE1Fs{)v;(I%wJ4ej8Rmm@{kq2S`#tP~C#*oF@L(N4D$O`h zJ~@+Ww3y7m{{}I|N6%`J#pTLgV#rEkwk{|INT)j-^se-)-SK*L90Vw}3v&Iq=YN$Q zVPN3VN1hWho8!BgeOXwV^EwJA-zHqyvq1L?O8w1wQ~1ii)IX{~2M#JV5ovAZaawly zQx}V@U8@HX5Tec4jnDKi-=}m+@|4puPeriDx6(C@T*N&EN#o;>Vm^fR>xf%IJHWx-wSIq=mS%`oG9v!A!{1r(FE)hA>_M5!)V zRtC3Rl61DqWM~tEnyly^DZ3C)U8lcexYrK-JC3&<%%`;vPB zpOm>D5vL=nZ;%&DTV<5-2*o_8i3!DxHIx1(hvFa`%?d$&1{HA~d!Ab>zq-t;uql4g zR@jC##tI*(D`2`XgU=)gg<4?{i{11v(c#ZkhfbP< zwJbMyrZ#@#p|lG!L)*|ZKjGsiGu5?4>#n~Vxy?Rf;ER@^S&2PFO@-L+Gz)t4+}^;+ zy)|;#_dp24dJpuKZn)DdiNDp<&8)d=^?mjTfXxC`r42i2TNqP z^C^CLj$M9_tE+Xf&1hvpp3) z!NKX3Mw8CIya+MdC!1VHFM)^nQkyB;Wm*9zr8(Pm z4%OSaTt2kg}RyZPY&A)T7gY_T~y;Nr1lTnxRT6*guZ5w6Z**sF`7T0q^nHj9? zsH4{%d{3RyS2tSxe&G(z;UeQYmTu-WiGP2UQ%r<`tD-BXMbvRsH9^%W`Z zqQX)U2Z}@dBT-$K9vFs39XZPATaECW88ftg0#7kPTlB^93xrV3`K5`cz6fL!DUgTN z3wc)TRl0n!>C=;uidEEQy_xW$K|k0Gz7|yXNb=CL|&f1APbWQNSh40-H2}Xe<{Wed|%1$__x&d2X|BHCKnp@u1FXq zfj>ngOs?u5JBvP&x-nU{-IrlVA;#(4U9x$~x}45^Xe0E^p%@YW!XEw}TtDqkwKBmH zOpW37%^CS9;TlKg77&q-u6f|0TYc?9vl%v*AsBkr6+G6W5)#MdCKs^(KHY<&85KMq zi~?~hTtlR_X|c9|31sMq+UeoqjRz?%1nB|#g=jGQIoE?vkMD)NdTjORg0P1Q+y7Cq zBq@fjq0!_Y@8|Y`IHZ3RvdNDkVnii0()y0}&c#r%&>{Liq|K;NG5=R3>BQ@|)c=b~ zq`KkarwIi-f1)w)@2x=3R}6RzWWs_jq|Vz~sEu5=27#gq;(oLb(YY0*>jYyBo`&h| zpP8}@(mctJre7Aeew$I!b&&dzFN83`13oJ05c;1+75;_|d-nV#yaX-5UG&CW>hpi( zuPUwE{kxRM829~)K71GhhDl52OG_m6*oqr++Y6)RT4>kGYX`?}kC5t#fLfz$ORTb0){wu`Xbnpb|MAC=eW}LpAkdUxYm1xU6+DDYZNhhf)M$N<#WfI8TteW3M z2p$NcVn9s=5lsPVKf2H{C2FY^<>gd68mwjWBXz3M>oyA74AFFfycU1u%6Wr2q82sr zO_w)RJ#?Kit{UfZg?&|`0d!To<#4dR<48?+I9Xc=nv*V16QN$A`x9!t!&|>K73)B2 zh>Z8)8*biAk#5m&0{^MrM5z`l+@co||3i}{<-piU>FT2M^kwV1U?VLscjmnsMPYK2 z8Q1RpuT}?Y11Sii^#4m`?VAld(=(4T%HiK5MDi1_C4nQ(`knna7T~!AvY$KsuA$|c zIOeTUzr3x%(ir}or9mmhDMIdTOky^vcAU?E(Gv{q9{-2Xi(I=4lyEX!ilPn^%l^-7 zL{dG(@WmLSqlf^qDY4!w_&CQKQDOH&`> zA0ch0`~K9TI`*OUQ5hi&bu44o%Fy*3FuornQO^qRsW0Ja_^m-#-RL>Y*}x^Cz?aNiFXGhvonzeX|J}14Y0e@~E4~bVezb z0t3~EP=EAKVrJCJF_4aiKXbz97pfxzss082psqybtq7zr&w@cG3Z+n70z;sk--BWZ z(7)?AK7tX4^HDQKOBrdKBytW4Oaf=wWR=E21iYF|8jfizEIs=^iB%; zbw1Mz;IHXIqNEFuvd2FSo_Zl=w-_kO%73KBAr~l*#Ng-s)4aG5aWT^LIz}iSAbmCV zfiZM(DF$IdtOsTbz6Apbq0*SF)R=nEkNzq8M&}@tYe265cNRGPc7I&=4>|!KS{#xf z!id2Z)H*0UN3B)O_+J|PWAeO*h2%Xps)Avh$@ z7$Z@Q8I$p(ZhR=n;Ms8!6=vxC;38ATf88p$oN&NZyL++ca6vK)*hT*sXtPwsOwOt_s=ZRpWlVatYbae zzr<%05p>MdlAFf3)qmCzrr-2KSKLtYA9Y)(FJn0LuV^Cmna_lpX!0kCUwSy7L0V$| zs8vV5=eP8kN8xsKD}a;zqcLR&^pYobAs6TknmM&wk@07r9o~M^O`qlVMF-Uj z>9_mix`+;mx%&B84@$HCSy`9e9D3Aa_*LF_zLR)u9 z`noC{B;Fb`FMqk}51`~1$qo7n@8tuhgm0{YV2nOA&MEo#uE4*!>rH6lc(O0#Lg(D^ z{x^A;rJFu8r;ax!{*?0quGQ5)C_dh6D1lAaJ)Q&7DVoH5z#pR^H*Si$&ipH7pT0G_ z&f-Iu+xVvfJ@u(Blpp@33`~8dfHda)nfy+DMuBqlf8wP7;(IOrmXIp{kQ1e@L$?$^ zvMah8>`xZr&+q!T0ucEWb<>#eb`9i0I<0doS%GE}oFjE1dh`tc%mwF57lDO;Zlm-S zpi(_{>`2I4@CUKYv5$~~)E`BZl&mnmWHI4glMzBh;uDPm?h5>^U4YkYYpm0WL?3}3 zFradLhU_9rxkth0-&u>^l|(Pv>{W2fv)w2l&Ih`gGu*4ki+&F>G>lWi@D?U{wL;Ug z?9^md^mBJUv!d>J=GKh=pnRjLZ*MWw@@%1s)ka#^0VrcM%7UKV|A%fb2N@e`efJ`u zrJCq|_M>MgJs1bF1KB@w*}2E;6YM3b?eo{#-y#zDP=#63dX!B0gK#7AEj2Q9cE^sK z+}$7hyBaH?5Jp<*po-eNmoBkWFqKid*}Wl4DMS%5N#Fz3v#_x zi*jAjhSmyrzZ`Aq*(dT0vR+(NQ@=XVy*ZSRMKvzYmoFhP(7yA~36~z%=qS~^;oj#v zwN>kr-|(EuliSyxkhDEMX_Zq>YSRH`ule%346+aK%yNGiGAhf>X8;9vVq3pPqX&3SKHs5qrmU`U1@@8D|RmYRc z?U&5)+Rx$NPv)-q!qt`i_m_)#wTgptMFk&&sT^>*q6^?Ql_M*x0$q-7&1f0!}~N6<&i=DG|=78RMDpP}|_ znTX7nV@Z4VOflS%y2iR@%jC(vd{(sWlGip4uUlD@E`P+Q{h-C+OA(vatSNdkI#83& z{W9D{yrY^|=1efpw-tBp24;_b)7l|;eWT%fcblg#eGmE!he~YfKKb#}0+}-p9z6Z@ zI%hbfsa&>97eC=RvaZpfYZcKHWG@p>r#yx}buC%1eQD^0E zA;-G8N|v7ySuZp{+o<2C-^GV<*hj;{V!usQ=gOO5VQ$6V&U{->6?-cS4*8h3=5EHK ze{-DLj~_b3=o*^UYxprd%!OyYP|nr5P5@aeJ9+iAC^@c zda6b)%iQfLaNc*}Ud~}%=3(Oj1@*%2AXQESanI>7u{0H6_UBL(p|Y7US636?n0-zi z#(R9c(8{rYMaamg*{$c0QirgQpq#HH9298-BT7i54UFf@Fp7wlRmmn~y0;as&2@)uRDJLiM)mlMy2>-$vlo#gqx z3zf*bK(5lVpNIOTRG&Cjfz#4 ziRFY9`VK4f;($`FyVI+W%1(z+1^}OS^WSFz>@^XXeYWry;Z<*kQkzaWbBuGNedufB zlUTN_`_ts>_xoZG7tipTp3@fRGUSo{YGmptpNcTLvoP5c$ zgC|J#1Xo<;i1TU1&;M&xw@IN%dEn$Fm{Of(4`Lql`Pl-fnnV8|>bQwfRsj*Ir*?Y;KO0^Yd#*(V|!e zF(|NmIM3xBGkvVrPxJqbKf^r#=K0&PtzdsG(K*3ka-v3j?ajO5jDMV=JhEw8c(Qk^ z_pMn{#V}5qQj?e_tO8#I*K{QoM|px=ni7d^`!C%HjPK^|d!r0?dB`tE;>ei2FU(91 zK}e?_D%|3D>#ZP%izhyknV8DvC+b5_!m$uBw9nnP8w325ybce$H*Z& z$BUE1Q`*9)yQb$0$gf0fb?T=1jp<7#vk-5W)f0ny#Lkk~4w{KllHr>tuDrWFOhbg9PIwyd;j}-BD||=Ulznv`~zr^$X>pLRKVW-jUmJp z@#`2Gm{?aSaE`lN?~!+7$HBWq!%XE|ZBK7O!&axoZ?89;-x_SHTiyHGrquZ1i&s&d zss}!P8p$ofwA#UIH*T0LJe5)y5W^l~sv&+qGf&@YedMJh(bt!aZ~hh$s!+LgN)Kzh zQs5UfZF3STqFfy}ObM4Msnj=)7FhfxT$X9n=&&U4%ZYiBHoLNIG)X2~Z~A<7hpef-uXAZ*eGKz0sGP1d zMXUT|%d_h06PL{|zGh=)#LtiXsunNn$=!7?QJ40K-NT_>6|KM}>vDV@(wvgJgbd7V z^7>vGNDQ5*uzX}!)cI6q&TVOkvh9T^o6weFu{pQpUfxR6J^XpE(bJQQA|J(<62q<^ z0}&5!o;ejQ;8!np*FrH5M-~08Zmeuqx?QFq8fj$4Qs_!fuUhvEJ{GNQ=l?RF>$7OxdXPUCJC!$!(fBl|z z2K*fLu2Y+#U6+BLC2^zBYW=2IIEaxJoM0aI{_U5a_+U9Z+ss%+iRA0* z@t(PGsnC%;GM3D(Ic)su#q?&Ldb!jUV}qsHP#B#2K6?KZv`lV8k_F!9y$K#qYl$@n z=9^yqwEUZXK{pPT4E|`mH;|T=nakDsj8$Fg`D|lm=AbmooaBY9DN2rf^Ms@MqQZ)9 zxUwj{6_);xYpoz_xSD@`i49M7yf9AyUx39to?bIrcCaJQX@)MJqxt80)%E6x&8hxu z6`8wZ;MS=3tYWkH8sZSD_82;+DZ}$m#P@?w%*=I%s|xZbkd?XJ3B97WMr&K-@KrwrzgoQ!@Q^Y zo*EvdW#4Ovxf|M+V-Vs-;O=mdD_1vNl$k$&2Br~9_;*@ z&b7AMz3*LLTSWx&x>js=a9HZ_fc4zDbJ|u`qG!&W8AH$-+d3~r&AqjRp%D70OI1_? z{rwr>uJ>eM5j{ODjCHuLQ79xNq^7=p4MwSQTv}FphOXh#K6Gg1GF4T3I0*L}rzNt_ znq@Jz%H7AOut4w9As7&I!f;U=x+_wGk@ikb(fAPQs}!zC`uViz23Tzm0`f>}X-Q&@S3u;{m`)E` zWRYoy$v0Luw)FiU0dcbtyTWKPtO3TMCj6=zhetk^VjzTp8W|Z8);zQf3JJ$goeD}y zlKR+uPl%0;ZKf<(r2Nmd~l(>{E0odO%kA<>>#OgB`vKOA1>h7v002v zOhdR)ZDa^&XlPCEx!QziR zmu!3g2p(PtO(uQVONVC=svaHSL0H+@%b+Edl>YU;WH=P$YhJy|imVA2Io;G`4dGix zeB^Qr{wN(9-W~V-)TvX3RcB6}vV*RDPebk^PEJnT_2Inod5$#*`AeZuG4}Oq>D#w9 z4c}W!naDcL-~XNu?mF4Ko=M!zd2z)k@Yu0U(a#7##Nd`VqLy@7c{!59qCK|u_R^8g z;XTn}nuqA!MrF`YO2$^hW9VpkCR_FF*{1H#_&aG;RRM4;zv(cV$zx05k_W$hsUTv> z;@tHf7zH|ecP;JmLm2t;3J3(lD`tOx)0~pWS6jnpdlzMc$>Z6xXAdhkxA`V*dRAY{ zeaWX!pF+`>iz;bu7Kh@?>wLGl2pwtejf|FT~8uJQ%`WYn8pbRwgw@%WF{T zX~K#?4CKg7RAfQ;Q9dFnZ4VneyRMCmL|Iu`X9O=*Qd^tzJO}EiRq!$zf}Xwutb$xr zHl*m=LtE#ZhWAbDdvS4d@ZC#YTeWFx)+FD)JsaPe^~JmYZBZolVKRhJUf(~ewSWKq z9plkaQFc3a(0jjzhuIu;xC?EDo|iX7om+};TnS4{-qT#5iuF<_KBIm!Xhv36))J69 zZeKo_A->U~X7ZOj#%mobh0 z?E|-8S`Km3j*X4=#}(x#|Hx3U9|=Pg?{d;a|SJ}<6@d!HOo?C$Qa zsjH(MlhEXuGiOVv$;pDc`L?E}ri1YFu){-xLDjtB;o(6PX(=Fexh%9j*9qJo=*K!B zG2q4xCvn<_5A=ASI1O)QcxMaY$+TfDjln7`5aS;XBOl*5<}=pwvNa{0?YXgT{dzq} zi4H;o(;l)VkVy)Pj^^6ug~-Sxl$eFq1bjfooJ__UWUUuK44+5@nwwJ>$R z>joN0c-d|qxXlIceU+Jvwc_gGp{EPNn4f=3k~W@R8`K{;DQfk5@=2)b_|%Sk&$>?QgzEK7z2Yu-(Js z(q2&eLQeef=~GymL}P*HA~zTCttBhbI z3{gGQ)zw>}gHOYLL~d&9>RZ~=)1rTg`*L@yI)@PGXiqtK)&2wpq z!M0jw!SH!x1ad*EL`Eis{KMWx2r1gIVxo&bDmpqC8lXBn%x$RF7^%{iFU^0iscu8#?W zh#wpq+J2YJ0WG3<$csx$O0=C5;^X6uEiEnATIkchZ1eKUCjq$)hJea_qXR8Ma44Kd z@jzA7^8Hw6=MEh!D=S3*Lyu!keT^G^Fx|!f^rDvHgNh0^e}De~fDW>SEbLF7#}@MO z^$o^0Sg$pUot+P^; zlvL6ZJ&P5BvOGoel9Q7esWxDSQ+YX3{%Ov1v*UwsRAHl-jf{*K{D@w28&m-ic@wjp z8kZ-pi4h*+N1bctshOG>eCa2ZLX0 zTbs0+ZN)P>uBWu3Voko+PztsL%t0P221G|kQ`%!Ndg}dvt;4Y1(2$Q}#dJ^*j$!@iVeohSu*05 zuy?ghp`;A}Fw-f_{Za%lt^V*Xf04m;P%a#R!wZiwwzu0g(V=+(dddOv2-rT`v zy=_}tN5`dp*RUP|L?6ngf5l4oPJ^J}V5kgn??(E;0^#N3W27uGhq$s_*3s3**9i%O z6PkMd;o)r8uV3fk;|oB_Ooo2*xCoOUrvG)ZIf!UsoxpOFOYVIQs$Pc=>+vuL1qD5H z(&_B$<2riu=>GT5SqSXR&h|TdRtO997@qcAB>qQ_9w~2^w`tVZ*H`!cB#Xt(qN%CL zDZX;{+88l^16kc2J7mey+x2Q4jcaqosBAbb0D?qu#h_T)CP!OBb8}TuY>14QUcSs{ z*7dkoOib)}Y^;fpy@_V(LR;szh2ao&9PV!x#XBrjRxUfQy4Ur`cVFQ-bBJl~2hO-U z1Kf4m>8{A4qN1`J4h%_KREqmv(#=LEyAWx05eSiGPZn@Te@L$W)P?q(`>4mEk&odB zC{obfU!q5sJF-jm5J&gZ3z2xMeUO81PUflIyESj0+~g>dYUlW(h%hX)P(+TGW|lNA z8_%p85qUvD%v1^PyKCn|UWKi&_th+F5YVJ}-59NonOuq*#5`st#ZzWl5aX2+7G&BCrU@Su#|1 zm|i-67v9q9`F*2y=3ADS;u48v)5h zDY^*XyL%TUL0@AilEX{SFq+ZJ46WM&&9@zEQ&^CdVo#n71c&na&6|4r{WLC%ioF?# z7fY3tVtcc^ir{WVQ85yl?3B9i`0XaqIEcVge$WjpFDao`uU>8K7M8{iT>|0NKI%B2 z6DYU&iX8^q4WA8Rvr?3VhL_X}=bbyTN6QRbdU8-G85%|m=Ni%IaW2JQXHcY~3#)M0 z^9>4%$B!Q;c&ZiGn(zBOc^KMrT4{?A zHfMK5=H%oIzqJY;8}q_XyL-TKtNgd`p8)5}YioI73hGCbDa9TcLX>QS?(zrr(;xh+ zKkWo$8iNIi(r-_M6>JKHAtBAm$r*I#j#^~r`RwXXsb!!y+CF~d^ziunbII<+K_CPg zUep^i4<0<&l<&p_nbA^ozzGcT4E|P$vSav+#-U+@@!37Sm{z^)E3k&0Mpr{!V7GmH zAZnXS2v(W*3k$`7$QY%*Jy7E{yV1@S2=J&Q6Z>$o&CJ-C(*ExGW&G1%*0qHjTDkO6 zss)6Ewu6Svckg1?(A3Pq37f5pJ#B_mhQ65bV4DXcCI@S&_*dID(r8&L-P)P)uxB8g zE5N(+$^Zyp9dGXyS2kXdCr_h3>8xyOLx#L9*?9nPGOJdJBA)a-PvyX)jv)45Mztpj z%uXxPvhZC5f#768J9Zsq^IgK{<7=fT9Dou}Hq|sXD;5?Jky)~Y8&S5LfU%S{98r>5 z#pV@lb?Wrd&po(<>W4=ZNNYT#C#^VL-WHWl&GjuR(1&M%6!}_!gVwID`qrb*c-c-n z?boBppN)%S6BQMG-O%7J=aq8D07WEU_~qwCO?c(t?`kb}L+up+DybU)V)fekm9M|QKP)=>hP#BCnwqSvtdx=xFRC0D zl^rQFjAUZ%>e5WYA|uP;?GXxb?j+S%7&v0b&iv?N~g+r>mCLJD#+^~n@K~>Dk#bxHWevadzbBdQa zyE)_nzv@=o6(3i|AEU0ESNy$BoZ{s*+4lMKXFc4+9Ec9aUA+ZY8(^fFtzAb(74)S{ zH-cWEq@<*@e|s~EM}6Ju4mZ`Ary`Tm;62pN0{9Y^pRbNLmy(yiR_zdkIu&V%V07g^ zTp%2_mmm2tdGNMVqad=n@+2uSKlb&-f*pW|?#Av3=(%g-BS4tHvRzunvu5t%Gu6q~ z_lv;Wqd$D#zr(7p0%}W2PiI@OU;z-cl$;zJ4h<4=h>wpie`1-*;<2$Ya9E6>YlPKZ zrLez4fS>_6Ku{FvfBWVh%$)Lpm{mQODS(VUE9wUJV^;SLECvQmy&nqoP(C2iK-7;A zKXgNAgw$+#O$`qSs&b5kbaopU83iNEgE**xs3_tcfjSQ#KGeqXD7Wq>bM_CsopiEU z!mFx{c;@QnRtk;7r8}!!xOlN!!K2?f8F8ktBOFu|~ zjdPJ#Qxm%XZ}rXlB-|9yE+uai%vDxNd{d?{B8JUN!aEC;UzG-NR`F){k|n`k}f&pZ0J zxBe>W{N~SBy1)g&=(*DlJZ~i++~BGk+wKWgR#tk>5vkYv<(d5X6S|Hf7XNX{F4qxt z#|I)rT_Tug&Qu#0b*5|n>(y$>pqlZVsq@DRfBrXm{jZAjm#&9)Ddheeu+Vsp??OY62_zW7Xjs49SwC?7-JJ#svL! zk!gpfh-XI!Sw1#YDj|?7C6UxIycBsCr zectptDuG*h_~=n7@d3{h-gZ*}@DkZ3OYIp^*IZxfjGKvnxm&zoQ(`M5ZvB9d!EV}vqXeZ3 z?_bWBFJI!ep00!tvL9SXaJigd7Mcq@%YkAtCJw<>whbMS(A3m<iaU&yUn zDkzhfoSd9+*$&{5K?$CW_dqP!-BPTHE^6?J6DQ=eqW^kS1{bDwL!GwxY9C>wI2w`- z*bShVQO7D=rZf`bfddD?ykrgM;|d58mXnhcUb%lE0T^gBmXwz>Y5kbE)ik7}qyzvf zgf(rqZ(kI^ZFyzojG5eGewQxIMH+l1vh5@QSGIk5HVje;2?7&(1|*yuC~*^XP@Mzl z6967IMr;M6kdP3k$TC*&X5~0ho`dja`0QPOKiGx?APjktpq3D130td@SFbGk$58r_ z3PfvSkO_~m6_6Z*05gH9N!mbpoi?a}SR05?4;HnpwbhSY-@}Kj6pyeltBZ@v1P+lC z);iVWN@}XKtSrrW2_7?C*?=KrkB@=I*Y%F+trQ zhK48vFX0~>#!y8aPe|az1vM3Ta$wh}9~xeW^%{j|)QUZGI!4oHZ0VuVeh{R#U9`XGm6>MaQFc# zm7IeEHVLZ^a^t;Fl!u~IL4f_i#8ancqst>TcPPoHuFEUy73+pDpB@nS9t?WU{p$8P21l!Dl)z5eFM5wmk>7eqxx z;YKNJw&oELiNF@NgSEIYMogb*z>2fvD+W zy5WAkxOnw)rv~O6O9iLTgM->&_Ku%E9gN)(-0?DV8^`wT+uy&-%gY;sRT4!3$)3oo zs;ZhxZfE12D)LnGV#rd^(9kfpwzhsR$lG${+l?ox42l-lL3d>5<_3ce*>Lg7mHGJR z*-y=ejwZtSE&Jm1w(W5`X&?*3;yI*UVRA`7eclW_bnBu+0>nA(@mEoK6%~~Yi5hcY zE>Q-wQYX&M%d0xw3v|>b<0NC>zFopoFxaqxhWh-p$ih35OiXV1>&S?p`Wj!JjUj8LoJY-)N<;~gq#*MYa11UmvRyH8&9`LynI$G744 zfhedR=N|m{nwOfLXHWupbx}^%L7tNps>&H)!cmT59E6Di%Y9Ab zDflA$!xie`G#RH1OYf4VCNaR7{_UvcD8ItOLiuM~OqSa1*|X*SU0YlGq&y`H0DM#R zN))`nrd}f-=Kzy2P*}|~2inqk*CjWM3$dyGZmKP`fl4-uWBONxc%1kl;rCLhi-+{nE#e zIq)ICIOwz#`?{g&X!s8H;PO5`KDbP_9Lp%IQ9HPZh8O?RIi9HD9iUh| z#)uSE^YUfLg$v>sV2D8=v9z*!eBEcP0AB1*5;a9}D|sR-&^ob7T_j_UUR&9_i$}S^ zcI@7}w;XM&g2_~w|EpZp+GT$7f0wIzjCNmJMd86;ivvYYdEM>PH8$p1zVCI?u-J?l zGbq2RW0Dr$O&M0I^sDW&dU0nhysc4c?A6%iw2=REqj7ySr;}5k;Wu=42vsk(>HsHt z%euJJurTmPMzMp?24dkU(ak27re^*#rX#(Hkrg1_sKZNfH6%Gy(DeP0tcsh!Tbn<2p!>+P;h#RSVd5J5Y^~7O3=R zn(2xspP=E*2UD%g<*UKE*`sNvM2-=Q* z%hKh`D?yNAI3onG%K<4^*#lc9zo5VY8_dAaFyP+3!@0H=GAKfS#%KE-JA&{D%g{7L z7~TEy`W!5(t94@VD4=Z2$1@^=gIP#}1}R>yNjdq7{27ZUPoJ8<{sH3tbQUW@Yif28 zAP)ftfUUzuKLBJHyzb@($z)~DPFC>s_4V@hCfk8RT#rKe(}os}x?6itlTCs^#`3z~ zPIFF2jTCkyVN&gYrwP4y^!psb2iE4JphOcbrJ)_^nia9Jqvgl-q(MA6g@Mn}HMd^UiYtGm0dzJACPJ$4+ACJkEB!UOYB zQdz0rFbeQb7AJdFJs=dGG7Zctsa1Mzw4LF?+i`^f1*17$iy6J^w)Z9QUWU;FZ*DE1 zS|KDMANd}0f-Dqm?b`db8s_Haq#s8X!>LoUua07IsGYZs1#$eKva&jNpz+Qusb;M^MU|A6wnaZ%GY#a8Uf>-u zL&Aa4lLXDfd6gBcC6DVx8IsWTGLzT^&J3xo`nsb!w0A%>lWl{3?(-2mG!5@h%t(bN z&n26Y+5lJ>2c#n)|7vNjU4@>P)p_>b)IA9xWzQP<+D|r%g#erE*TY>pC5$=>!UC0W%+3O`h<0>e~PmS3hxsH8bRn< z%mX5bt9CeIrman3ZYT93l7iKtOpCKB`_b_n?Qf2aiwno6)#YK{V~cH{l9Ez$bLV;h zr)EMoi|Vu$q|e788bD^0im}gK&O^LTi&0hsTfcmQ)ES#bne@YF98< z7V*D;4k-<-)9c(AkDCW&4r=1-Hbo-%vdnQ*_uF{~pVgRL)=pE1v2AU)EA;OU~ zYe1RS#NNGF_6rv;4#v+!GBUy3)a%!l8ouMRQOPQ+=^)V`7`O4W+Q)tVO+wK4vQ}0>M`ob5`RceTxpx)#qUbomm2= zi)kh|{mrX@Y3z7D#N*vlw7(c|=PNKPl$4c~H7q7Dix)MrnYZ=MQ3mpH zK6(5&UA z06SA1_~*ithzvJ$V6F)eQWrpX5Z2FSFJADYBcFai0^o>TIjm56rjz?B$!^2egsw{PDd1|axVetU9!iwqz2G*L71#VpLc zys`3A{83ust-QQ5ZzNjH0POOQOct1aj>c*x2m!@c7ds!oVQWXn=J&5bxEg*#yyF!V zG`}QFrrV0HfW$G(_-cC^^<_AXapufWk!LIvaw&qw**%9R_ln?qJ6?m%Jw}Er^{dRk z9GzL{Q{Ye*XM@#6ZS>9cWaaH>tD9+4^Wwg!B&ym^@A-TqY8HN#)Nv~_i{VB=DX1F7g8bAxFu!vGo~DdRe@Bp4`C23(9pHF>4Z z>cd(*9M6paI`Ggk*tDrCXM0thcuWF{zI7KP(ol&=`!c<4Y_t-$ujiO0p-}=dqP4Ft z8uZn*RbtZ+!!Zd^pGQ_2ZV&opi0m#7pX$D>8$+Nkh{J+Uroub zgn|bY8#M@AUj4-ub=}+cN2WwCpYhv&13cYRd>TWWCs|1g3NKog6HMcJ8 zyN_c9f(KcjF%h2Kui6V*GDusfZ_Zx3YDQC zfu1v|^QELH(6#Q9eBsPs0`d=!I_G*|%dcO#t0IrNeG7^<>X2c zvnUD!7w#PxRidcO%uLnQLRjTD^LH|V^5egZvhn#Aa=#Jj*SfhWA?%S+F~G1^3r|0_ezq8t5J5qOuS!QS_B5FgPmR_$vYr;p>ZOclrj&%9|KMRW&b4vLR<`Su|vx)y`Kk*;ym*dy)s=e zPyZTQt-5)t`8A#Drhly)%l?0BWBmFKy#Hxr{nvNI@Bcq_SpI%#O+T1QP8}KQzR8nA P4hpPWvyqmmYZvf8DDj|R literal 0 HcmV?d00001 From e9c77d9ea9e862e4f7cf862608c69e623e22e52a Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 3 Feb 2026 23:33:08 +0000 Subject: [PATCH 077/158] working through --- cpp/src/detail/jit_lto/AlgorithmPlanner.cu | 4 +- .../jit_lto/NVRTCLTOFragmentCompiler.cu | 12 +- .../ann_ivf_flat/ivf_flat_udf_bench.cu | 43 +-- .../ann_ivf_flat/plot_udf_benchmark.py | 257 ++++++++++-------- .../neighbors/ann_ivf_flat/udf_results.png | Bin 217724 -> 210923 bytes 5 files changed, 187 insertions(+), 129 deletions(-) diff --git a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu index d28110c370..948ad66466 100644 --- a/cpp/src/detail/jit_lto/AlgorithmPlanner.cu +++ b/cpp/src/detail/jit_lto/AlgorithmPlanner.cu @@ -69,8 +69,8 @@ std::shared_ptr AlgorithmPlanner::build() // Load the generated LTO IR and link them together nvJitLinkHandle handle; - const char* lopts[] = {"-lto", archs.c_str()}; - auto result = nvJitLinkCreate(&handle, 2, lopts); + const char* lopts[] = {"-lto", archs.c_str(), "-O3"}; + auto result = nvJitLinkCreate(&handle, 3, lopts); check_nvjitlink_result(handle, result); for (auto& frag : this->fragments) { diff --git a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu index 06330f50ad..c53dc1eaba 100644 --- a/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu +++ b/cpp/src/detail/jit_lto/NVRTCLTOFragmentCompiler.cu @@ -30,15 +30,19 @@ NVRTCLTOFragmentCompiler::NVRTCLTOFragmentCompiler() cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device); cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device); - this->standard_compile_opts.resize(4); + this->standard_compile_opts.resize(7); std::size_t i = 0; - // this->standard_compile_opts[i++] = - // std::string{"-arch=sm_" + std::to_string((major * 10 + minor))}; - this->standard_compile_opts[i++] = std::string{"-arch=sm_75"}; + // Use actual GPU architecture for optimal code generation + this->standard_compile_opts[i++] = + std::string{"-arch=sm_" + std::to_string((major * 10 + minor))}; this->standard_compile_opts[i++] = std::string{"-dlto"}; this->standard_compile_opts[i++] = std::string{"-rdc=true"}; this->standard_compile_opts[i++] = std::string{"-default-device"}; + this->standard_compile_opts[i++] = std::string{"--gen-opt-lto"}; + // Optimization flags - NVRTC uses different syntax than nvcc + this->standard_compile_opts[i++] = std::string{"--use_fast_math"}; + this->standard_compile_opts[i++] = std::string{"--extra-device-vectorization"}; } void NVRTCLTOFragmentCompiler::compile(std::string const& key, std::string const& code) const diff --git a/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu b/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu index 0deb101cf1..adf4481d6b 100644 --- a/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu +++ b/cpp/tests/neighbors/ann_ivf_flat/ivf_flat_udf_bench.cu @@ -25,7 +25,7 @@ // Define custom L2 metric using the CUVS_METRIC macro CUVS_METRIC(custom_l2, { acc += squared_diff(x, y); }) -// Raw UDF that directly implements compute_dist matching built-in exactly +// Raw UDF that matches built-in structure exactly (struct with template specializations) inline std::string raw_l2_udf() { return R"( @@ -35,27 +35,38 @@ using uint8_t = unsigned char; using int32_t = int; using uint32_t = unsigned int; -/* std::is_same_v for nvrtc */ -namespace std { -template struct is_same { static constexpr bool value = false; }; -template struct is_same { static constexpr bool value = true; }; -template inline constexpr bool is_same_v = is_same::value; -} - namespace cuvs { namespace neighbors { namespace ivf_flat { namespace detail { +// Primary template - works for float template -__device__ __forceinline__ void compute_dist(AccT& acc, AccT x, AccT y) -{ - if constexpr (std::is_same_v && Veclen > 1) { - // int8 with SIMD - use intrinsics like the built-in - const auto diff = __vabsdiffs4(x, y); - acc = __dp4a(diff, diff, static_cast(acc)); - } else { - // float or scalar int - simple formula +struct euclidean_dist { + __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) + { const auto diff = x - y; acc += diff * diff; } +}; + +// Specialization for int8_t (matching built-in exactly) +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(int32_t& acc, int32_t x, int32_t y) + { + if constexpr (Veclen > 1) { + const auto diff = __vabsdiffs4(x, y); + acc = __dp4a(diff, diff, static_cast(acc)); + } else { + const auto diff = x - y; + acc += diff * diff; + } + } +}; + +// No __forceinline__ here - matches built-in +template +__device__ void compute_dist(AccT& acc, AccT x, AccT y) +{ + euclidean_dist{}(acc, x, y); } }}}} diff --git a/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py b/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py index 5ee8778bff..aadcc38d0d 100644 --- a/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py +++ b/cpp/tests/neighbors/ann_ivf_flat/plot_udf_benchmark.py @@ -22,7 +22,7 @@ def plot_benchmark_results(csv_file: str): # Create figure with 2x2 subplots fig, axes = plt.subplots(2, 2, figsize=(14, 10)) fig.suptitle( - "IVF-Flat UDF Benchmark: Built-in L2 vs Custom UDF L2\n(1M vectors, 512 dims, 100 queries)", + "IVF-Flat UDF Benchmark: Built-in vs Macro UDF vs Raw UDF\n(1M vectors, 512 dims, 100 queries)", fontsize=14, fontweight="bold", ) @@ -30,135 +30,174 @@ def plot_benchmark_results(csv_file: str): colors = {"float32": "#2ecc71", "int8": "#3498db"} # ========================================================================= - # Plot 1: First search time (JIT compilation cost) + # Plot 1: Median search time comparison (float32) # ========================================================================= ax1 = axes[0, 0] - for dtype in ["float32", "int8"]: - data = df[df["dtype"] == dtype] - x = np.arange(len(data)) - width = 0.35 - offset = -width / 2 if dtype == "float32" else width / 2 - - ax1.bar( - x + offset, - data["first_builtin_ms"], - width, - label=f"{dtype} Built-in", - color=colors[dtype], - alpha=0.7, - ) - ax1.bar( - x + offset, - data["first_udf_ms"] - data["first_builtin_ms"], - width, - bottom=data["first_builtin_ms"], - label=f"{dtype} UDF overhead", - color=colors[dtype], - alpha=0.4, - hatch="//", - ) + data_f32 = df[df["dtype"] == "float32"] + x = np.arange(len(data_f32)) + width = 0.25 + + ax1.bar( + x - width, + data_f32["median_builtin_ms"], + width, + label="Built-in", + color=colors["float32"], + alpha=0.9, + ) + ax1.bar( + x, + data_f32["median_udf_ms"], + width, + label="Macro UDF", + color=colors["float32"], + alpha=0.5, + hatch="//", + ) + ax1.bar( + x + width, + data_f32["median_raw_ms"], + width, + label="Raw UDF", + color=colors["float32"], + alpha=0.3, + hatch="\\\\", + ) ax1.set_xlabel("k (neighbors)") ax1.set_ylabel("Time (ms)") - ax1.set_title("First Search Time (includes JIT compilation)") - ax1.set_xticks(np.arange(len(df[df["dtype"] == "float32"]))) - ax1.set_xticklabels(df[df["dtype"] == "float32"]["k"]) + ax1.set_title("Float32: Median Search Time") + ax1.set_xticks(x) + ax1.set_xticklabels(data_f32["k"]) ax1.legend(loc="upper left") ax1.grid(axis="y", alpha=0.3) # ========================================================================= - # Plot 2: JIT overhead + # Plot 2: Median search time comparison (int8) # ========================================================================= ax2 = axes[0, 1] - for dtype in ["float32", "int8"]: - data = df[df["dtype"] == dtype] - ax2.plot( - data["k"], - data["jit_overhead_ms"], - "o-", - label=dtype, - color=colors[dtype], - linewidth=2, - markersize=8, - ) + data_int8 = df[df["dtype"] == "int8"] + x = np.arange(len(data_int8)) + + ax2.bar( + x - width, + data_int8["median_builtin_ms"], + width, + label="Built-in", + color=colors["int8"], + alpha=0.9, + ) + ax2.bar( + x, + data_int8["median_udf_ms"], + width, + label="Macro UDF", + color=colors["int8"], + alpha=0.5, + hatch="//", + ) + ax2.bar( + x + width, + data_int8["median_raw_ms"], + width, + label="Raw UDF", + color=colors["int8"], + alpha=0.3, + hatch="\\\\", + ) ax2.set_xlabel("k (neighbors)") - ax2.set_ylabel("JIT Overhead (ms)") - ax2.set_title("UDF JIT Compilation Overhead\n(First UDF - First Built-in)") - ax2.legend() - ax2.grid(alpha=0.3) - ax2.set_xscale("log", base=2) + ax2.set_ylabel("Time (ms)") + ax2.set_title("Int8: Median Search Time") + ax2.set_xticks(x) + ax2.set_xticklabels(data_int8["k"]) + ax2.legend(loc="upper left") + ax2.grid(axis="y", alpha=0.3) # ========================================================================= - # Plot 3: Median search time (cached) + # Plot 3: UDF/Built-in ratio comparison # ========================================================================= ax3 = axes[1, 0] - width = 0.35 - for i, dtype in enumerate(["float32", "int8"]): - data = df[df["dtype"] == dtype] - x = np.arange(len(data)) - offset = (i - 0.5) * width - - ax3.bar( - x + offset - width / 4, - data["median_builtin_ms"], - width / 2, - label=f"{dtype} Built-in", - color=colors[dtype], - alpha=0.8, - ) - ax3.bar( - x + offset + width / 4, - data["median_udf_ms"], - width / 2, - label=f"{dtype} UDF", - color=colors[dtype], - alpha=0.4, - hatch="//", - ) - - ax3.set_xlabel("k (neighbors)") - ax3.set_ylabel("Time (ms)") - ax3.set_title("Median Search Time (JIT cached, 20 iterations)") - ax3.set_xticks(np.arange(len(df[df["dtype"] == "float32"]))) - ax3.set_xticklabels(df[df["dtype"] == "float32"]["k"]) - ax3.legend(loc="upper left") - ax3.grid(axis="y", alpha=0.3) - - # ========================================================================= - # Plot 4: UDF/Built-in ratio - # ========================================================================= - ax4 = axes[1, 1] - for dtype in ["float32", "int8"]: data = df[df["dtype"] == dtype] - ax4.plot( + ax3.plot( data["k"], - data["udf_builtin_ratio"], + data["udf_ratio"], "o-", - label=dtype, + label=f"{dtype} Macro UDF", color=colors[dtype], linewidth=2, markersize=8, ) + ax3.plot( + data["k"], + data["raw_ratio"], + "s--", + label=f"{dtype} Raw UDF", + color=colors[dtype], + linewidth=2, + markersize=8, + alpha=0.7, + ) - ax4.axhline( + ax3.axhline( y=1.0, color="red", linestyle="--", alpha=0.5, label="1.0x (no overhead)", ) - ax4.set_xlabel("k (neighbors)") - ax4.set_ylabel("UDF / Built-in Ratio") - ax4.set_title("UDF Performance Ratio\n(closer to 1.0 = better)") - ax4.legend() - ax4.grid(alpha=0.3) - ax4.set_xscale("log", base=2) - ax4.set_ylim(0.9, max(df["udf_builtin_ratio"].max() * 1.1, 1.2)) + ax3.set_xlabel("k (neighbors)") + ax3.set_ylabel("UDF / Built-in Ratio") + ax3.set_title("Performance Ratio (closer to 1.0 = better)") + ax3.legend(loc="upper right", fontsize=8) + ax3.grid(alpha=0.3) + ax3.set_xscale("log", base=2) + + # ========================================================================= + # Plot 4: Summary bar chart + # ========================================================================= + ax4 = axes[1, 1] + + # Average ratios + categories = ["Float32\nMacro", "Float32\nRaw", "Int8\nMacro", "Int8\nRaw"] + ratios = [ + df[df["dtype"] == "float32"]["udf_ratio"].mean(), + df[df["dtype"] == "float32"]["raw_ratio"].mean(), + df[df["dtype"] == "int8"]["udf_ratio"].mean(), + df[df["dtype"] == "int8"]["raw_ratio"].mean(), + ] + bar_colors = [ + colors["float32"], + colors["float32"], + colors["int8"], + colors["int8"], + ] + alphas = [0.7, 0.4, 0.7, 0.4] + + bars = ax4.bar(categories, ratios, color=bar_colors, alpha=0.7) + for bar, alpha in zip(bars, alphas): + bar.set_alpha(alpha) + + ax4.axhline(y=1.0, color="red", linestyle="--", alpha=0.5) + ax4.set_ylabel("Average UDF / Built-in Ratio") + ax4.set_title("Average Overhead Summary") + ax4.grid(axis="y", alpha=0.3) + + # Add value labels + for bar, ratio in zip(bars, ratios): + ax4.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height() + 0.05, + f"{ratio:.2f}x", + ha="center", + va="bottom", + fontsize=10, + fontweight="bold", + ) plt.tight_layout() @@ -167,35 +206,39 @@ def plot_benchmark_results(csv_file: str): plt.savefig(output_file, dpi=150, bbox_inches="tight") print(f"Plot saved to: {output_file}") - # Also show - plt.show() - def print_summary(csv_file: str): """Print a summary table of results.""" df = pd.read_csv(csv_file) - print("\n" + "=" * 80) + print("\n" + "=" * 100) print("UDF Benchmark Summary") - print("=" * 80) + print("=" * 100) print( - f"\n{'dtype':<10} {'k':<6} {'First Builtin':<15} {'First UDF':<15} {'JIT Overhead':<15} {'Median Builtin':<15} {'Median UDF':<15} {'Ratio':<10}" + f"\n{'dtype':<10} {'k':<6} {'Built-in (ms)':<15} {'Macro UDF (ms)':<15} {'Raw UDF (ms)':<15} {'Macro Ratio':<12} {'Raw Ratio':<12}" ) print("-" * 100) for _, row in df.iterrows(): print( - f"{row['dtype']:<10} {row['k']:<6} {row['first_builtin_ms']:<15.2f} {row['first_udf_ms']:<15.2f} {row['jit_overhead_ms']:<15.2f} {row['median_builtin_ms']:<15.2f} {row['median_udf_ms']:<15.2f} {row['udf_builtin_ratio']:<10.3f}" + f"{row['dtype']:<10} {row['k']:<6} {row['median_builtin_ms']:<15.2f} {row['median_udf_ms']:<15.2f} {row['median_raw_ms']:<15.2f} {row['udf_ratio']:<12.3f} {row['raw_ratio']:<12.3f}" ) - print("\n" + "=" * 80) + print("\n" + "=" * 100) print("Key Observations:") - print(f" - Average JIT overhead: {df['jit_overhead_ms'].mean():.2f} ms") print( - f" - Average UDF/Built-in ratio: {df['udf_builtin_ratio'].mean():.3f}x" + f" - Float32 Macro UDF avg ratio: {df[df['dtype'] == 'float32']['udf_ratio'].mean():.3f}x" + ) + print( + f" - Float32 Raw UDF avg ratio: {df[df['dtype'] == 'float32']['raw_ratio'].mean():.3f}x" + ) + print( + f" - Int8 Macro UDF avg ratio: {df[df['dtype'] == 'int8']['udf_ratio'].mean():.3f}x" + ) + print( + f" - Int8 Raw UDF avg ratio: {df[df['dtype'] == 'int8']['raw_ratio'].mean():.3f}x" ) - print(f" - Max UDF/Built-in ratio: {df['udf_builtin_ratio'].max():.3f}x") - print("=" * 80 + "\n") + print("=" * 100 + "\n") if __name__ == "__main__": diff --git a/cpp/tests/neighbors/ann_ivf_flat/udf_results.png b/cpp/tests/neighbors/ann_ivf_flat/udf_results.png index 79696d7a9f06d293918cd158548c240f42bd2543..be6846656a5d4d9743f705dee0746e3a90eebe73 100644 GIT binary patch literal 210923 zcmd43cU;eJ`#%0cku8c+$tb0vMcN_Sq@lF5H8ixPDI?OLjfS+)Qd(#rNu^TJRB1~k zP3?Zi<^BGC9{2ru{QmwvzCMrp{=DDrdcB^{>$=YKJkH}d&g=G>({dZv?OaEpP&O*a zpHQPvXxJze>bEqj@i!_C-<0tG#GFo^b5ggv;N)uLU`9D*s*6RS$j!d+mS-qv6cL1g?U`P8f68A zqHyAv#^vbYc2_;R@@47YO?B>k#w&O@r~Qqo-R~6b-=3EBCqv62V-y}f_GUd$fe#e9k8LDR(8_xK~?LgKGo@qZH?jN!X!R^IwweU|>2b7^TQ+vETzCuhmS zDTN-XubEPwb04S84Gkah82tUOI4Mu5EAEGVeSJ+w#{T@N&7b~v{ZxPP|6R9rJV~e5 zhAZB9{+wSxC2;3q)2g;So74%hh=clE+;$cgw+3qC?zn}uKB|h75<4RyDe0JFtD;h? zmpQL)Kin)(=6MPKoo?ndYi-Ht{L@m3BRM(wc~;id*RRE&olU(N#3F7yH#sQcHlA2B zqV;pbmnps%HebrGR99Db_4Z!pI{WOA`=4LuiakX4?%U^Bc>1wQOrE3MoV1XTkcf!L zBahkfAI(|&w)342Xv#DgaBa9v;asz1v2@{6)IGD=p7P*{d^xF)HKW@%+H_7R=6D|5 zuyLc&g$t|2#l^Oe2xBVm$G2#rCU{zd_&*7Y)X=xW~;23q_tXoZ#674GBUnx zY}}rt7=C=HF}bx&>rX40UpVr{j}!><^ZPt~dLS)5T{Xu-RU<=> z(WWi0thXXG%X``D$yckG*37EW4L05zy4m*bee4_X_wY?Btf!&5n>5l{!aCYf)bMoS z=hsv-%To`9Pu|;m(xF{=PCBk<E!_|!chkYhh(?y2lZy|>BvxD)k&8%Hr|VM zADbC7(JQ=6`%u{CU5;hLnW8JsGjnss^V1`+iPo@U%T?{SB|3a z@$oH8HmJmFX8)-NZz?MlLo{Dps1DBl^^H?Ej|c0wUtwZ=yfUS6A-~XLb}Q2n+Y`aN zBfyry*U+Vgty{P9JO9+cznV<` zsGDtPRvZ}XDUaeeT1hr;YN#pUT$V8&kGx^9|Gx*p(aGuk4g-&?*dE1tt<3W=W=G#W zIDFC6(#6HaqO;gDVrXh?tRmZlBU9{5VNp?Cfqjqigov0}f5ocwoEs*KRho|Z(hL)C3C&UC@J|3vJb5Ei^^G~VYW|1qCoac)@ z4pi-6VEE`m^>{hcb+qGoUfya7Ic#5^s$ZV@9naGlz$nCT_=b{RnmnYA)vSAAUMpb` zT>SOf+4Z?rUn@s1ESU4^F$FEbN6&l%qCsxdiUdV{3^uy~@rIOwfp_Tj^a{q7r=0}k7MW9aDU*g~};)1c%7 z&U@~XER&cMr?Rs0czpQssRy04_V)IdC;B(m#7PI|+jUls$4GhZbRKFf!xyMNIkP8b zNM%`E=3%y3)rHJDlHgfs|EB97p-m|&tlCH4;pgoC{L&9QsIPNTG|A_>ZTqXYpFZ(a zIpq$z%QcCaQf#-rAaHX>KUf!-~Fd4T7k}&iDQM z6I9gHMwMX)W`1{FGi|xLY7M_xCEdcpM6KA@%t>8;!KN4HDi!j!YuB!|Xv&Bge!U;7 zj$8?RcB@yvs!om`b6t%xSC=B2 zZy-rljrv?w1aDV&_g*D~qAR(Lf+;F-AFi$0Y&sa7v8Ng@KUU7_ZTR`ou_wCu2k~61 zcI?;@6&3ZSzJBYC^*e5slt}MC9ewL!Q^tWtS_THr8i9<&%VXWyFF#)M^&P*IIvx=p zzvq?n&`q4y(zZ6;JdcLtQ#6}4)%Zp91%-#xUzz&JuC#;dSR>=Tx`#*X{-z4-&aYp; zp6BFTfB91MTYGz5o{cV#Zr%n2^!>)J(b3E)M~D7TjFZ3GnRe~k#eewljf8}KI}cl3 z_g$~das-mQeGW8Xiyxm zv^Zbd(6H^og$rW2+&nzWc`*-6IIyOjo_|=lG@ekDl#~hz3LaTDBsXW7_$3B4H=kP= zui|^6nZ6U5#n8m$8dAAQO|N7S+Zk!HHiB`2rrib?gBCmsY$54WsB&1(^x z`!mXCsXOO*w|~?mrYA5kPyq2+X4ci!m7PP1d+RgaAG|M2_NF*r=r@V0!kU{-&CbtP zjkAcmC`YI?Xm8uG!|$czz`LfV9n;g({6a!9Pt}t6J!URxzd7d;EnuLh54uG2r1 zSrQRGivq*$hP3v($-q0_q&$dIJ$VCpO;b}U6wxpq?X5@z$B!PRplsaY*2?HDSZp=j zz@wXEagV1K6`n1DV#dM1d*~1yw`SU#TeSPW4-EJrLLZ-rKTb9Z8>*+TubOh2C4fO- zuWo1PfddDc3*Cg~riT3d>ABxkRIJ^Mlg@uLh-Q6o?WMnj+s+}BiITa&Dk9@nMM4E?}x9iuhAgremlH%n*zhY|I zCJx}c%N`xOhTWhp>+D#+o##!0T=00jnHs_-w5Y^!pyulX={ z7F#DW=UBUI=iA!a%_sfouHLvojqLRO=TCo!zRGxo(ESXOR}M918U!PY_f1`Rhhw&F zzw#Nq>F%=iWlo+f!7_fE!bf^GG6{biKNZD)ETp`(RmVP&9ucGmD6j)sl_pFlvA0Jo}l8lnjr(1f9{J!BTFad|cWkd4;V?{K6*&Z7CHMm1F6e z+>Mf^AGc_G+1c5Z6)M_9NqN57m9HVbdGqF$SF2X8bkJzROVZ_^RHz7Hr=PHzA8JSm z-onbt_tXR!tmWA%pXbJ9{?>L)bqR9xLtz*|w#pWk!N>Ft*Lq8RX_Q)YhtK zUYH&4dxt|n_a`f(yaGk^dTZ{*p~;tNdM~q2kLY9>-}pU}-?t_CATd4LNI;4Rjo!01qJhVf~W9Cu8FT|_)kjT{3)L98l!M32!pU=c2-`wA| zt##V+iu?4i>VjV2E=e7iVy(G=fysrZ_hKB&KLj#4I>**cOD0r#Ri2K~9!UvyKG<}o zwcN|gE89bHzza*()m?VS_x0=7jd`LXA~ZYr&VRC>SByN!n>vB`@ORs`Z5y?_ySu`9 zym%L?m2oV90;gA#`ScDJmYAMtZcnc()58ZC7#MD4B1~^Y@)`J>eX+{@J~F~PAHX@{ z&5rV1(>RUrQN*FtPj1UHuiX`IQTJHv>k+FjoZ4!fY@w%Tr=p@V zonuLy6*_Wc$A#M1^r=5jGO`_i)qSk_Q&>N6e6T>tt~`iEY;-hwA)+$y`**{ZIUwl7 z!zJ~zue4uW2$4Y5*%1&B;Izc-`t!?y@{y)#{bx?LMb1MJ#vQ0}+>Pm(J*X1Z*@fh! z_Dzh9-Rl$gUMgf$k(a;46I@c_9Xl!>IIO4C+v_@iX9pkE`t^5sYI%L{zA%Xa;`*Wf zhL-e4KVCeV{BrT!Ifm1*5(ku26cyDxd^vXS-p%)5I&KRc-F2_Uxjt8+bpZt4BadcJ zBChr!kXd!Y98u%_8y*sLC@VY8_+3q1z3!jX@VAjluD?*Ob!`g@JI+#(@#Ud>m)3h_<9<=A%v5L72 zbI$8|&X43rNqZMJa?DJ~C@QW40=Lt?Qsz%D+Uu|jNQ`Btu=SIf6NMjuQpfrt4fe9L zv&#+r=qxGWIW@?=L>3Xl6N-L&X@2Eh=T#%WjiUtnuW`F}|sb;=7oHtvMgseg}#OX~N3*mwy4U z74qCjA*a|fbUpKtBS$8OTht@G@Falx60AA|OH7VcjPBlwq~+<0&AyI;P-%N5{d|#* zuyTJ#k^9)vqPxNaAg}J{ldZ$;ua8t2U6u>n8O||rJq`#P$D;O-O6K_@(+TN;6Jyq4ThwZnj=Lr9JUH8=m2guZhme=sTVsX^b?}GOzjGRqEF*h>a`uoPVs7YbC}a^uXwDQBjd!+c?~c@+Y5m5ki}=p!Gi~H1q8fLQi{BU&V{Ef!5%#zbH36; zpxSqpmFv(6+|)_`fUVsE+*~2#GJK9=*>IDb%AlYikxSnx9ffZDD)O&PS$(=Mv<83Y zKjh%#bfP8Otn2%C3ce-k{{20`EPM9sDezi+#Wvdh+I6hWuEeN5Q9;bcFS#@qRgxf9 z%5FBc69c<_J?E$65%SrF*WD)ijlLDwi+#NpnXD3bFt*m%*mw&wGtc}m>1qb+lVCa^BfTtmPO(18XR1^YSpUiet!D^7~AZ=5571J7w|ccF?PQ*7uc_Dpr0KQ!=k?@9kRsO{h= z3S38Hd!xk(xO(~wX+IE21*^HjZxc&Mx+@x>-{{H4UoWaR6xf?wzj;%!{#W~JKfKJ_ zii!gb*`u8$wGQ6slDS-Ze;U8>S<(F`;by-(zlk_jibKKC#YOhD%gFmC#jDqG7%V$*kkyRWm!k2`Lbr_^VxUs{+@(VhcM@EJglrz&MRCjLE^M(Dz)k7t=~BR`L8 zcx|pDq*dZ67r!*Uf=lr{nd#~2)?8OfG8#W@*P(Y&oNzh@7t~k(;Cfvua&al_CVO>F z*v7VeyB#2ya`LzjA3n<nvyTdEA_=Fltg(!W?l?N)hnGA!>@ zXQ}+ah04{{0^7E3-P`XzJD$|-vg{RzuQfGm zk&Nk@z%3mZ?kLjz(Uzat{Zdg&F^;u*;IZCIhmgs$shVqX_(Ptq4FGTe!FDl5P^JQ% z-*vp8X=?`1Vb#x}=BxxB1E2@fSzyrXc!p>x#m(-RrH#cvwQ`4t^e}IYV*cPwZz8z)GiPEd7kIi}{ zRvFmVv>-8U!{@V2;R}?fMQ0cPbeavZNP7Vipl@TZy0e{^3+&HM%pm6U9sxf=&7?k$dQUvi4i+%FFrgx z%vP@aVFXM>qZscw&2`;j8+~`vL>|;v8_oduc6DUPU(Ar`=Lg=R+FNI=47+BF#hz znQw~V)!pyb?Fw>GURn9mc!e(3leFd`Dfi@pm~Vr8QT(}$YK4)JQTN9U6uOj~77fW8 z(J4JCILOM%Ix{n4+Pcd#N9o}K_2fsU6(QY2{nZakfncW-osdB5vdxsna{c_)pjz*9 z|H;4jk#6VI&#$ZQ3dd98pw?EiK(w@^C`? zp@dtu>67W>H`aAk@s+#>GssyH`S8Gr)f*Um{*qJxMf>hQGbcciRuZklKGE~mHa2F? z7u{dAe)zB&KO>7Jas2ixjc|?N4z?TcoiAj-NseS*2I?m{+f3z*c``sLrC#)-v=Oz%H@@Gofrjxp^MT>2*qTUrOkdCe(|xkiII5OezR$Wa{ZLZE^7;#pybJN**5 zmJR#08MU;u7G`>aLnewri2+|-xBB`_VH`DjJDPAgJgT#;`Z2A$-a}x{9;4hf7uY96 zO-~N0GmE=yKu1Q`kiD~l>Mm=lf`S5$hn9zN(TUqz83aw`Z69X-_;_!hh(qtK#=dl& z+;@*p-eD_Y60C0;qe%=j=6C(2r_g$P8^`;hTa)S{sP9KTSZHalBFQ8uhI19T|H%*; z55E}~7k9dZSBjl__S_!ya&rt)nFg)|A;tBB=tj_*Ank-qq9WZ{u@<2d=Del3r6nFu z>ef-5b~A}rUd9G}kufTGbCJ7i-pyMzRy*%3hkX1fU*;q({yMmAi==w$SJFHG>MRKc z_&;~<+^s64(0x@UW>wo`{Q81}gUhkaD1IZ#6V8M6)cyVaA%{RKG`F=?NnIPXdnWC* zaLzuX()8L8NR!b5pW3x1K=d z-1QV-^)bYlh`6}<{UeKd-&(Rz@+&Ul3#?mn)*5(>oecU}rQ)}l6^p@&2TPdg@Cd4j z5%V1ISf8rFrOiQ5 zA0lhd{jG+e8>0kFR&!~l-2$on$Z4?tiN@2d9-f{;EE&42DBU*@Vc!8KDL(6W9Q=;9 zGDJdD)SwvAqn@n%7M!nX$y>qDu|l`GNlWcr=`6am89 zK)C%F_~qwr6Bov&z-XPxHdVOn?w$ZR&Z+vC3XCpcfi#|JCP&U!Nh)zt**@1%7khw{FS)oxIB%gG1zApKx8M8%>G!?HH7$RQOiD;dIIWYXp`kf? z=FFMA#uyf@483EJ5pp|3s+2#x9Fe{mEpaSqGRO7wQ2qX%g|VF(85v^XO^qK4BfN@$ zQ7-@fzM{V-hWnxPEY=kvD9z^RR^qiN)-+T*6BZqU4vjmRo2A?;S7%`v?@w11&S!AG zys_`pYr&qu!2?QF43iWJ_`U2yN%!^(W*iX{BUw%c9p~wh3i&3JVnnj&GwB3qMcY3b zUY85rb$j%zn%dS~(p~}5%M05O+o8Sg3o`lGKrSD-#0{qw7RY`8_JBJ?fg`{*d&5NFPd7o`ic69Sii8&SYFd263LegeO&Mg9Gjv%js!F+kHBGs(5edCbp&*J&2I!Eof(MpHl06U@_ox)7tgU;>E-XhM8*%$ zCm`RFTA*;AgM$MUfg#GprAwDSkFGZB24wmD=TAAx*lqA6-2HU}$XPK$hz$0D*I;vY z@87=~^)HNyYi4#f{(R9DeC8U;_nEOC24Jm@Ra>Q+hI}VWgivO0Nu6YsHjTtTaBe1~ z1qj;c=m4zIKCo~pr(;6GyPP(v;owxM@~^xr9rL3#mZvf;L8C1@Kz&Q^2ecQamJtyV zPtIjM?r6N%^r^a9HU2m?$n66z$IY1g&o$lKBA8I6UIjGgKREZxq9UWvB2rE6S-n?{ z?V0coD$)Doa)TcPll$)5OUJr&-6JgyNnLNlEr$4O zMZ{JwvZ7lon=BMLGC9xO<6?Md9W6E_ETN`$;cn=J#)W~7=-9~cA4B;;Qr-hJzK=vaeIg?x z8%IlnPbI0v8N>fRf-}`s>ed%Qh1EPELoYj^Rm~OT4|Ma?YG?`#f-8(m5sP(}xeTb}HHs zjkL%1d-i+?G%_}R9O>+#%oV;48ccYS^yS}2$8`p8fBd%x7h867cOPhRv2b6BLLJA-S(T;uA`jp8LsNrh3aKC#7%A=3N0`c0nX;7IFSb zx*)=sI{o~jFwG>0wg~!5!{~-}RyR;*aT`t(-aaT-(uO(PQ$5!0g|cXJbqKl+BMm0J*+RCt(xork#$T9deK)bE-~ zxP*zR#7cQ`DYdu|%gqwJa48}eSh>#?5Ls1q8 z3%Yut>CXEg8NzsVH3fSYkY2YiGOpFt)s?iq@eN2Y%Xvssq4oRQYev@A(dk#fW2G?g zJO>%mvTgEJw24(y2CtG5x~@~KVOk{9%1p0%10V_^k!$F;*UaxdwjG5}&&i!e&0mX* zcq=e4Jdsu8(oy@$P<*Sl?uy05#+Tm9QmZ%aWK#k)a`4hXADD?pC+}=;eqkOQm1ox} zRhH738)zDGFEDW9&&G7_wT%)^Keiz9_E$tUb;f`WExo>W+i!GhO7*qTqSxR^ApDN; zgW_PhK>Qe3CU7-Qh(vHbG9KxSjPU>xFlNxol~y*}?sjcBY=L^lgp(SU$bx6P$e&Q} zd^JBm|9tW{Jn%`h8uY8grTd0fl7lqdO+y?tlPyp6UJ7z)W$-A~iu}pEdUXZXbt>z@ zl){-SSEPl7H5*&Je}8{#Eg7Hor6svGr5?e>qtupraR)>bKBZ-J+w4aagi`m314@z* z*2p}spI1V81mWok5^F}|Jp&-F)EPF<+gsV$GSMS(yQ_^Nv6h7$P^XKVUhT7)Rmu=~ z;cp_KZ1w!vQS_R}2BbJ7xajWcZr~4Nlt;IYjD^QepA23)oE)D+f8@`i!deA3icb8t zZ1Ao?>7^N&i!Z_aJ<;H*=qw**F7aM2Hl8OvP7E_+F#GQGQXVJ~9*g6BN`P@Emp4}5 zER>4$wt||$;ohx3D5>=zbQdL7+B*pCmI>BMXVobfDd;2F_wVn2RQ76YVnPOH8bd3q zbvV1{bahRzEG-#wZMTjx)(U_~`v9W#MOtlD)q14s$Rw5fg4C&6nGw`p>wJCC5|~|y z{kSP9et}i;%5@-H`9_v)+peL&SGD#G5A!f)ONoQ!r<2Q=L76m@=}1XVHeQ$+Bia{_ zx2^XO+y4FC=<0mX5Ohxp?2`>RBN5k^AJDl6y-zl!5-^T*F8+PXAd);Ls*k{V;@?Q= z2~e3^<>nr36~HQeUZB7jY+6X(&-u$=_=SZd0z4=b$Q7Yf(iRa+J9jGjiX7A{*qy4v zx?-(j+yr_d?(?tH3`%w(OEo?X8g4D|mbPk4yG=9-LA1hUs*8UihSt9=y^`nLWW~7U zOVmP#s81UR^WwfzC0!Gw?M{o1LQ!mmJs?lCxa&dPDR_}|TpF?>Yjvm+(Nt35R| zGP>7e)C@BD2D?(kEfu{LmMpXeUbF0k;~*X#tK6OBc{kRsryw=fYF?@YpC$ku6bPP) z&!G4{xHII&_^1+SV%bcc)ZrIi`{eBDVU@V-vyJ04XNQl27=-D4oib?aA7C){Hn+S@u1$ z3Uhb&$ly3V7***)meU_jgX;1QWy*f?hbkKfN7Xoz#(808XW%olSt1jF0fm=mHFg}X zoy=b5NY?Q=y``rDU}C}fkb;@+hUAr7;};8KfS#U!luD_Vi7haCf=a^1~N9>^fD-zNnlh_es>>2SLz+hqj z({BGCEV2PY39HLUr2rN z2N}ED&iBU;3klCTI>Jf>)<3Gtu~Y|{&f(59buY)ZeW$Ih?Pc(JjS}|*gMLH5UHDzP zd8W&c)zIAB4+3Cmdpip(1fYH1gYveAW~BYHF#4uqDfkZ@(&KQ~u`MDUH0Rko6wy~c z&m6py*m3a9^pP)LzXsJJ<1qp$9Kn2`pZ9#B2(}m93-=&a!)vB%O-s%JtkCf@XSQGy zj0_EZKzR^FGJ71ZpcW}Ri=gnCnagl#EI*T55Ka8LN9Rn_3bu%SruYu-<6hTp*ne3X)lo+ z&9fUic2#}0W8nZbq<}Q-7pqlORRKmzq7Krmg!^GLULajP$zwyRW_taBxNFz0b#M7f zU=6;_Y<>}tD&*BhxcuP5KrQIacq0z8Xe8UD8>4Dk-6cC>OP_n0y9VOy8r%uHSNXO&ImpOqA$1PFcW%~ z_LLxzm_Qyxq&jOoIkN(QqeKadiM~hrVjhDh$vC_CePVcJDsAo&Tv3@n^ZppEyC~d+i#1+!d8t_y)jkIp?)K*oL$84jrl4TO~+O zC!jvOpSVG4_@|Znfr4$&PoH^)};i+N0F+n?cm~~8Lh5WT2r$Lt@HBbjdRyOJvh7`+w%35 zUXq&8cy9%RxJz1%bI{Ji>wqm9XN(ZSWDGus=fRhq!q(*f2zI=FOd zns=-R@kz+M`!2Q!XgY9-^S9E~E?8R!pofsZ>At#tB_|Qr)<58~c&72x7oz23utfe< z%do2`$8E?(8xPd43TI29m2rgBhnEZL;CUEHOJRRMH#fKYkigECja+A*YJlLQDW2_r zXdPaqB6D-YTclCJ>V{M;K5V|hfEcMCi0=f5wJ&YX=e0C9e@A*p%E~`I8V3)L*W4gS zQDDdIdqrvOW{))vWEOJ3X}W>z_3=UX2r=#G~-(NORAvgTf0x?nWLSHA3MjfZ@88}eNju*2`k$uI7U*xVD;`foSrlble04QiYr9@4)G zX)q$y$WhuH-n-^vPjT?M*GiU`m{37a>gh2PLk`LVX+O!%xeQ0uIP2)@64ND`gcT%q zQO5U*iRlbU6pEgOcLxHPG4vh~lSfeYeavfPiTa4n5XlOUasvG(RQwd?+P)2m&Knq+ zm`WiX;(r4zbu%S}gGj?foE1AIeHNVL^W5B%*t#>I#hiwlX#vn{a{_R3P$jO??vphH zJA{W~fOH-7`}d{yWo2*Qy;}*vmQ(-LA*(M>t|B6dL6&u@LJ=)45gCaPSYW>o*zpc1 z7Ocvf_wT8|WBva9+Xsddj0zKScf%On1U-)&!S>OLVO9v!x^0cKRBLwb(x$CVg`PEVy#BH4n3miBq%DO zkuMIeuH{f>_)%Mts##zv^#w#GbseYN5Xuae<3lE5hc^GjPm`D`>BFnmY>=hlC1`RS6 z`I*n?j+Iw2kG}&|{2Zp;mM>Byry?TCM#l<8cAe#-hi8!Oq2}7VYuE@~0x?v?W9GRr z%bZ4@wKj1;<7Y`?Auijo{Mg+zg(W(D%_fBqR{kr5&a?SK6Fe{I0v|L@TzZ%2tab7^pe z&)~omE!b|QUG$?~y;`wG0XYCr{$Ahub?ZXJrK5e7UhcU=&y2M;0Ah#Rq83HZ$KVwlt8Xn;Ix@#rlzKj2Qd|LNLpGN z!WWPC&J*_?oSnbw>O&|fM#IEa*haky_XW0r+u%-Vn79 zUBF|Mt8*_MlFkS3m@7nCFu?*b9J06@1>Rd?!!8L1pzzsd!N0FvPyO%g&AI}-jUgI= zkQ`V5EUNivj-NlGFo|9qmTX%12+rv3g7kk;oYssN>!&*l_lqAI$?nhbGSmk-k= ziE)9gx~hshPG@4U?vJSC;luRcV(SOLMI)b6uiyS1E}-UYv-Ox4p`WP68v)LJv0Mb% zcoaSrGTB0RP)|hHCM9|ZPIo|_-3r{gdDc?7`{!{=ufe6#g_KS4DJT#jK;~TGWqYao zfWM)W{qCRPpvNn1;pz=(0?9`zhtokj6O&b7@FhHE&UBtyMdA5RWBmh;A@=(GfS7(o zXa!Essy=_-LO33@6b@}Xl%w!-s>04d`X_W4s9h1x8s%kWgp4No?uHHPYBmHCqb}sj z@z;!TM8gBYjP6ZLu5aMe{RD=9GbrO>+*+Fe9c3?FdT!bRxrIoZ5Tmp;e*yIP50W_; zy#3g(cJblP5(B}?TDZwRgC`zt%jc78JK6%@;*$m2;~CHkAL-`X&cMt0K8I{STzP(> zq3Tk3|9c6BVdNk-YPNLrH~azubvntY5ex#xR3M7KBdrRWRT|<=v*VYi`hbJc8b+s; zqDkHEy*Q;6n-=THY}t7 z`FdD33Goj&$X7m;A|)=UeRS|SbEzi@7?3$HUE-kJ1&x}QA>OM|?u$eEpE zyP=z71wyI&;8tD{uLDq<|BDJy0x@onPS4S2%hc z$ciw%UJElG*7SdqzFE?D3BhB3^PM2xKRl5CGYYkG)gjAzDi|w{BbUN18k4$7)kgW$ zDfY}AygF=EFRZ?D;a4npLz(a>*A$aN7H~kdE-s;Ve@#QJta=U)&D#%GUp9auViqwGDek zy%q|r+l1VMfmi%NB0dH+L~z^f+iPLhARmWBwiPRX987ESNK+2SVkay?nAvO0(8pWo zys56<2*1Sh7cWA?0%3cDeEz#^hXFA>z~@1H3CzsQ_A%SB4iE+AH#(dYS?u!|j=C{sbuflg(J1vX z5HR(-AFJ0Hf)6ubscfc}x@Zo9;aB-aH3k%qI1q5i3Ttjjo22^yc!k8T< zDE!~j9Buuv9-fx)G)X7{Rc|%rPhuzsmQXnWBqJM}jZ92T8foWf(bCYaa#>hhG>c0Q zXj^F^-E=@zY%|7`%vu`}aVDV4NeQ}h=gwuRgIXEP=nt}aZ}9E#rrj}fZePRLbOd??urv*ExnnKhp;tu;gG@q?b9z!yQ8C$>@&<$zztKA? z!gm2nlc@$YqU&}X3;^nU46Qy&#C|VWg`0myJ1?e}qZvY(3{Aw>aaB!i`&+)Gw*5_s zPu@J!f1O7;x*!Dc!`Q^+g?=qQf^CYj1-5=CuAzYe4(>Jt(OXcm9yS<~!CaOaPl9GD zHSoQ{eIFl+wj_XYOUHJ&&p;_^>jN^@Kk!CVpS^(IBeW0oc83_q=Bm_5EwmM6SnYn^ z1dv7cpEGR+NHAptS&M}$EJxNMsa*#$9@J>8u0BNw_r{SkR_JcrucL@m)`J)+a{v(B zh6Ccp*3vL3`6QCv_ii6`Q?{crJCfZ`lIsv}qzZ&jWF__0y+MbZrQ(y~IAY zh3XjLYYZ=5OwDaa(boFbfe}Gs)cMpnmIu{XZ=~m&npoWFI4Q|02gQK|ge+G-X4RhX z*rCL+xuMC~Tcw_%Ct5ZaR#S?=L}Jp*%Qxr&K|r*joMIJ}q<8o=x{`8$qCKf9gLojnkRKg3Qv0L&Yf*H zH!$vH*tzpU%Xv&Rb-^K~)0u$$unxYy0h5c6A9RMcQXTuznEo(LS+le3Z#E1o5jp;M z_EvQTJ?3FxTUV+_lNdI{kvSEQn6~)(EPO^xRFo0L<@L-QKuLk)K+sHUOA8}7-lUGk z%=<$UMIb+VU?B0IZGvA(i3$GZUMzN|>Wd4fNXkwTCXilAN{VLV#U_{v8Cv&}*Xv01@FJ5GN5mQ$KzJ5W*42 z2df8NBnLZty!Y}FjP3L|y8&?eq&qJnarqDU;`kWC{&MA_Cgce#v#^G8}+;tiH;@{BS$qjw>Ly>xfla67)C#PpO zGmH4(Ux=$(EHeSFhRfLV(+@c@khI?0+j{}jN;d6-F#PUjGIK}T(ken6fT=pHyKkLY z5A0}|R0N1I8} z3nOJ$VzO6Ih|LTKM6sUO)kx{B?bKv$x@G{@F?f=f6Q6WiRzj31r8%pt!bf5m{OABN|rk^0?9ZlCHt{fcNO)zU^Hf`ee zbf5e|Tf8)Pky+yM793K)_;@3)BWo#it5zcX(MKmRH@&o|+Y19gRsG2D@aIA|NBB#+ z!T!LiybD}NERK(&kcWGgaoK0oEjZDYx6_2)z3YSYNb!MANZNa}3d5KT1?wMDh^Uf7 zT4T3F`yQ6w)Yb;!_=`_c5<61O@84ZA*RNmS&Bdh{_wbLZBzSUSM|L7D(*hd%kfgG* zvcF?5pQ`|L9nZ4f6=v~M<8agj`I5}=TLaYs{q8cSg$Nu1lhVJ!uZc@)Q; z*rmbzbt5>tvjKhxSA_omd4B$mhVUR>S6A0B>AE{WT_6jUqVCad-TK(^w+^6$ndOdy zdN5_La^h0?n{QZI|4(o6Bp~Js1?}2{&Ah6OSjNAQ*tQ*F&bv2wHNsXZP<-I%Kw5j; zY#+{qv-JU+z|Il`3aAv+5PJ+iD!~a@-V?2l9`!Nxn!uT=jQ!;VatFdVTg(JXA3*L6 z)84CK2lsJu%GWcFH>gM-sJSNT&@2BbR+5Pr^Cn4`1A3!-h*9VQMgHNaq{&g=$jBWL zJlc<+%hmJfzoNq|?Y+dO8eGU`j4-IuH2~R=xiR}7=iMWgePCHP!RuGA<9&sUxg$`i zA-4;hYA{M2+^xh1i|g5E8uya?{QP=I;MiO|_5vm~g0HF?P5usW9wlt^fTsw3 z1V->GRb!q$eM+WZc#5Fr%>3!}#(XtTKa-S)2%NAH2~(z82u93SudsKDvhVrOg`vUl zK5mUCTfn@b1aGFifxF`x=GNrL;dG{kJAQGiwXPr>6GWIbx7F~DpNn|R*_k_+EKEkAVH1S%$NVhcE{*jYGG$quakXjp;RTkuX0csTM5vgQS z{?NrnW#v0?O==l<-ax$Gu|)4&M*4Q7$J>^S!Rct~zXL*tj2Z;u4T$F*&a00(fiU&M z&eo~)_jGAox>ULPZ|A;}%Tq4vW>A;(sn$sJ$%k%YI8ih~kZXa4s;w`>(N3|A>S&gu$cssI zn#x1uK(qpYae3L>IG6nijA$i+eb6Bx%e{CG!T3Lz@@~&a```g)tuTHbq!$M1B z?CmEKL|KQd0PV9E#V|XGwV;qW7j)*@c}IG(Fq)Zin36;&1Kb>db~4hEDcoyK5zs+{ z;J;Ai-}^r_jvlDKAKPAYqY!0+uj2P7l4CHkvZo?cz~We3*iDPmtUg$*{Y+lP7=Xhd zZ2;Q{6vyld9xZrL?iSWD=NtH@B>bEDGds20r9UV8QnvxU>>w@5S-cP1I4 zrQ357bV^;5%p-J)^0_2!aYOLkIkF1D@Pf|fCEXKeusGWv}>ch(cFKeKpl z9T}p=V{)uFo%v$H6u-NNMo>E`B~$Kc&9^Tp+)1_1;0(s6n+>AVkpURZE(-dlc<+0>LUJ_9MOq0c8# z?qFas8*IH9>59f15RXT{O*k>qe)3iT5u|~gD(mAU+I}s5 z0kUT_rmTnm6fSMfeLmv@dV>$}E<5%-0C#N{S-hVjflr*VbZM`!vcuc!mzHL@S9B$Dh_2RUe@>lic z@~Z~hB+v`CxEeS(2$DRC?J`0X5nnMGOTz>hx+l~b^97bFp2-^NAz4Wm7IvvbKwaN-Y z#nX1FyqKWG(G9uyIm}Ve@5LH6B>)o;Mi2;JEU@Nx78KVC*#=6OxXeT=BDoF-O5pd& z1Q4#ce#SIb`HtEaPEVIymdyKs&G#5lt&nb6h`s=JO{f~Iia#>;V^}vub+o)-HIM4H zk6OAtJsBoeitB3FW9C5|)SUHf{mCgWxcWKe4GO{wo<9O5S`J&k#GmN2$%8717$ja1 zV$^JKAn6^++z&#y+0Kvglj0xC%Gd`s60vUYV+r#x=k0%q&&tgp-a?Y--@AoKN=SSL z{%zr(1~KZdzVe4II0tge!r+^8FD`5(Bc$Nn6vn-#zH0fiIG_sAtY06R=nSx`8&mio z{$2P=1)rSw>Gp5moHF=0(7_tPp)&Nz4U!T<^Z*3ZMfb9l2zK;aYnx(NHs?_&`W4=rgOi~;J<|39|Q2_|um zwjjTolOh^_sOT+ZnB6qK-SaW_i8od5U%R#n?|%aYwF^KFwsy4Un+g0R%^9*s`0z4L zsu9#NGI3X~X&wy12%_&^rU+>a$_bRHT|us3Utxi5hFWteXb2?2r@Dqk#kjBV##&To z4w-*_b9CujQ2n-f#uC0T3vt#pR8&6}nI8uqUAcSYTilrY3*=A#tJv$ds9zL8O2UA1_tf+$>Vtqz`W+nP0BIT|``b@zc50FGhxjxQ7g%ffp%WoYswL z#eFy{_a)?1aRb8A9Z;jHjgyY-)y_6u`{0n}V|CA+iu+Vdr+c4Z8iu%OVLefpL&VXE zyFyyuqa*|e4>0bFuc!414+|XhL@mibYCA?!P_6gEg{Lg;IhWJX6hlfjq4^;#n>`qX z#|n*bcoH}D`aWumw+fhiFod?1Z3Ypc41qP-NV}GTr4CIZY!uO$CniD@Ie_QU5?|4E zJ=WddzZ(5P#h92)>!H81I3DmTEhVnpN_hnB@1J3WD&Nyo$KIghqOUUdSOn@-sK>1g z*PzO_I5EL@9sacm9U=3^d;{>=_*U5v7f5^W-@pGTHV-<>^=3Beb z3`U#FQn+|UfLJ)y-5tdHWL##UCxTTDIfMoL>n`5r<^)p>7BvEEJMj_VOvFS8>pcDU4Ku#lG%R1{I9qaHkX2l2YG zu@Rl$Z#iA>4@a+ofPs%)q5hdx=E1=YDk_$2;QnS-kSQL?@C+oUVL^-P$y+zhS)5j zgpw&owrWs-rF z&-uc0*#-^ni*tuHGxYvij(pXLzlwBO#3l2p24;nd`mo|PhXhw&roatQ(zx(9pu?8` zhl2uOV637WO{#y)0I{-w!C)_q zO#Nz=W7oM0#FrfE@YvkcMP}G}w{t?=ypAD4t9F$ZT3iW396OLyc7SX+1F;(yyQoDE zL2wx$_K&pCD%{k=H`G)HZj(3)F2QUaiGjgzxH*eK=t2M+l#C%_-*|AuZWC1d zXwdB_wAC())L+5vlPk7}-~u-K7+yjZ<6(G$zq2a`_0BLK=rQrs#4Hatlm2&{JpbAH zH9?f4$QGnqAIpUVTRi_uT$>*Y9{L>Dg#Cxim9|74_4GhCxLPu~x3w_Wao{xhfsH#4 zk+}i_Ct9*l%p7wrHXRJ*ZOTC7W6_qE&hr^P$ys9G2XeayngwO6a$4a6xpoMxqEbBo z8@cb`U1ujN5_mZ(fb*`Gh9B?lIfuJ(*qZymco1Ta%zxo-0VJnA2M=z-Ps+*5%NL)n zhIQny(`W|^xS$;kNSV-t$4WO#KSHs*(#Z-%$h?Qx|J9=9A^v(y`YefBCs}oBq9>klqkf)q7;%N1CZ+ zIlypcJMaEpm~|u`Kkddv_#m~5QB|1Nu=R4l{z;~d0P_5+czJor4HXx3%0al2ix67W z>*0A4tKJbC`yX~c*&M>Lf$Ma7mO>ux>@zy>UwjHBFeI7A+TjxgcEA1hjhT$_Vz2=O z-GKwHoinD$11R#;qZQj(G8Dpsz1{@XQGQ z=l7s|UZk00ld^Mg=N=mEGd%87KhNqFT{+w^=jg~Bj1z^d7U3-K4cJnzu$CBsBPCBR zKXqSPczL>X9zF0*+;1>L!>h<-b0X4l4k2%P)d=(x8T5OD5=gG?0-vTHLfGSU_2e9z zw(U@gHS|DTN|F=<4`)bXXNk9g%ddJeqy(NNW^mfHFTc~PlrX5rjuLSnN1?0*#-$1c8ycT10$`dfz|P<|asw7=vG1?m$^|3ZI)AzYA?bp1hBh{V zI8-KxVQnv|dTyPZC)RU8!@G2$Yn4JD`aV3IR^S1>DWnO_HAW2$@*4k9cUf8G{<&V1 z>-&C{SRTgXb66QjOPevyeAhdW%Lgcp8YFw!%a?05Fg#Lc!4MoVtPyBLCWru2fX&L_ zptu_yy#XW#c@Z?cPxN040~GEZ@=>To{|4>i*riMS!nWUTVbk|wE)R$XsycC@zzKmF zN0?c4xf;n}d9<|mF5tg%oI+x6ZqZn;2wOKnlMCnyn{jOhCUmd;>*U+MeLEyy0Expe zVksdl=?iCi*dE4JCa%A}y(L8f_h@~4<-DHU?gxk9JNQR%snU}u2V5x(Y#++Y`d)5eCK$)=z>5k&M30c1kH^b3x+qMeXfVkPfTanj<-*Ztl zCFG+WY=a_R3nHYPqcC=(aDd;H!Tk#4^`N^x0uZFxu;J5a^3O7yW8w@W(_ENs``cN- zCSvcj4NF+?`gH)PV|>3{$N1NbG)u9>B$HH7@-A z@J$jIAbNIVC_y5)b#T}JN8Ou;W4*TTqaQP6mZ4-wDh;M6g$z+>B9+oW6p}{CJcTlr zA)3paMq?RDWJ*byT9L8LGH3SS+2^hGe&6ptj=lfg`}nQnSiNh#i|6?a_kCa2bzbLr zUXUgP1h9kH0YwQRsFM|PFhv}^pnEAbyl59#0&MsX6hfeLKrzHE1>L6OR#sQZ9`xFiJ&0Ppk%q zQI`rP@e!pvWMhlVr-yO!S)yswYKM%0w6toMpZ&{< zYHs`Ye2exb{iAFBnmA>E`91)HYI+m5xVj9-b|Q;~MCwo4Hr+C~Dy^L}2hkZN1?P6j zYG1TT<_nlt#Nv2f9#PHzF>{~dCQ?|x&+j!lb&wmrQoj}Me}A|^WXpPANqs)VYe~W#Qygu z{^!r>+zWpHzd0KJuM7M?feZN;4VeG)|J4V2@2$Tk%^)fP5x&*(CDu?Eu@V1T38&H@ z>k$edJ!58N^&E8!pR*b_>r(lW1IL#WQ%#HJUrL;zvGv4NP_$Oc#T%My=dl4K2ZIAK zgg$d_0Uw{00BLB==_qS9UY?>Y!qK!0HR0zQACMndfPQictQMd}9B5J!kN`G_L^?$% zkdcv#UUtBN^#-Y6QJLxig++sw5h75aRtzXQiQr`{*UETtOBSjM8`H-q-6Gbr^h^EO zx00O^ol-xXm+t{Y6DoIl`c^_tLpl_KeE_D6&i?)TJyphTCiPD3=#ihdrvBT5>q*GZ z{3i=2ixfy!Fl4M;O zh3i2Y_Orb`vSs(DTR6c{u$j#?GI#^&LMLhnyd-q4ctK)E?@a?QsX%mFPC~!BAVhz4 zb#=Q&(6L~mh!rc9nwt92SBMy{LHd0G*)DK1CNNvT@p5?VVgsO!GzEPmDGIFA;}BFJ z!2be8fT#f8IP~~0;SB-NOV(UUVZHMM=`Nvczdn8TWwn51_gouw#{93j_aMfK=XYXsA0 z0V4YA)`aZv;N3ZoJ%sCkHlY9j3KH2-^1{e&7)1!V7pRX(Xk=t$JZXjW7EG)lWL`vk z07o@!I%f2lFH8NE;O;%zJgVzq062|*ijI$ch9oDFAgcr40_RtuQ3CKTDH7q{!M{kPWqqf zvC+-QX0YxVG&S=XXDbVmPC>?js{m|L02UKOdIl98c=1Hd0{Rz!d>RcDO+#Bjd* zuP+bQv*M-BUHhM#aDg>I_-tr=q0jGwy!*85M0eG4;VZnw|PR9oZ{!tfvC_8eE7)02Q-A z@)sC%unCgeft&ywz3%;8SkmNrNtRD!1X)>nsD_7-mlK!Fnsma!Bjz`t0z#Ee9ILT! zlQxYeZp0G9RUrM1ltry8NR09E<_P7PXc55VBW-a8CRY8&Egzqt{}8cx18PN1#2ezD z2N?|*0R%XTHF=)VD8Kj^UdoIYs`2#lXpPAg@hF#Dk$zsobHTwxaz=cf1$ebYT5Ngu zUv>#$1c5unfFcmVCLe#LBJT zc(v+j-$55#wAO?Zg&3*3fh41HdSEMxxT^pN6P-G797s4Wy&-BrVUy^51B5PYhLlgv zU+m$`4Z{!~T<|YKMIiEMh3hBbtmoH_>B|{=KI@qFFV`nSb z@A(lyLYkI~MS+%s=i`p`c}_iWydV_-qOKnZ{H6T{AkUE_4ZB9H9X+C9#7~4kKv==%@Bx7rmA4<(hXu0r z!=F3aJ@0Ivc3Xoc@FcJt0(O&bn;9J!E{YDh4jhy~4^DJ1fU@P98g`M3(n5PjoGfHyak)(u#<2dM^n7vaa7SeWh!8l5OkvcmQ zqEBL!4PKuh)Mu#j8KETx{bF4W4Q+P13&?=8HoG8frX){JjMA|c(;9&-pZpAt^J?&L z_m`i5GZV_5;FR6FcVESNTl*dI+EtxOIJ(j1B3e?#FW8K`BY#X&=k%nkN8Ce!gW}#=r|1KNw$Z&=ct=RqUI-%nj)-B}tM!93&C_ z@Huq4McWdOb2z92cCmN*|11HeH{AV$?7DT#SSrJ>@87?FVd{^SY0l_iQs=uJUJn>} z5$L7BDzHB6h(fD14L!W&@C)U@lC^BcQHMpbKkQu=YJSUl@Xn;ix;i`$Mm)T9cV!E1 zZbeT|g&m|0d=01ZduY=Jeg0jWA}_sS1qULf(8LuONw#n50iE;vSPJ?#$l9XZJ<%X7 zsYTDE5c|mf?auj>enD=zXK7Nt^%Q_RLQEQ-u%GTFhtEO;=$nz3FJBhKGAZs23z=Xe z22-74>rVz8I)CzHf~6WzQF)3NRQ>quqcuW_cE^sLDoZ((wv~uUC@o~SvFpqa9lC>r zni+}fGBmw%Z+y>e2BFU&xd3Pt3sQkkTW$kDSaa(gJY*qfm+#yFTjdreq{P@DT!#nc z6TAl3@9=73(Lxr>566KOw%BOGNHEsRz6DMNdD^zzYkRz|Tj4ZX)f|o7=_-c`fbCo&kTWb!uoX#-fSXIxB$f0lAn#vKR6&*{8MT<-5Qi z3QO<+AXy$=OFI_$cu%66ZjbHPWj%uyrxNnzW2f>#uV~K;B-|+=(nylDPkn!JH+!%~ zVijIlA+R~YS_xPT5IFnKoV9yClJ2#j-Tk&m;pOu%BTdRti3ni^4(O`4Vmq=>g!StO z+&4fh4qXUA61MhiN?aI)9t}tS?D+i)u%}Kt3PRfjz@4#Z3b%5xB?i}&tG>?Q7ZZ~O zeM$!-E_~|J?$ms(1}e^>b-3#tzXz){yN{$hcze( zMB!ZQD6!dIw?Xb8rgbbI&M;21+y;wrzyCSkE~oi z`~cDZCeSw2yMeSJIC+tpuGk&>ciw?>o;CL^`ZnHv9uq&XQa_}j3-O|TBokPAi7be7 z*CfoZnGySA0ikZiLYHK>?Fs`0Ebub9ip&8tBNiZ4_Q5J4$<;4xT%W@)h2TGa0l0B2 zSg)4T zlj;~#^9wl}u>-;r8A6-X0?u}p{(XSnq^;uP;+WCDW$^wvFdzq3tWo1Cw3CI(Zp+Ja zIDFirRS1d0RW#<=!Rc890c^=Aya+51vWa6nz6~nRlU6n689#7Ws89tP;A63lJRM{f z0=`SqpCkRToFz<0$5r;bc7+gQcF$X+q=tP2yHA+o`F65ad{u?k9g(F`0orhRg9atJ9gfYy$p zYAZ5Z7Rm?eL^5;%HJvtbqD0SE8n_*CcxYwP!sSN&6CDvDH827G5G!S*>!~dT^4CHAC6GfQiG&>eZ&TSuRme6 zNAZFK{P5-Yl1+U4Wqvm|VpA~uL* zF%;tKSCHMEYa1^OGIDNU26Cekb>t|PG!XluUuJ^XZ9qwqV4S&b)+QGHvDWKPL!jvw zRrJ=kjbuIh$2H$3VbI$TiAJ!>jq2Wip5MNcguN0pn9|wv6jX$r)$o)k`FUgB_J;9~lgdQ}3#tsnSM zR!bCKRCwdR6jKAllP4Q7H7#GS*=O>pH@!C*=p)go1-VNUy1U1l@o$FKJjy5lLM*)`)Cp zA!<&3!qHp2LF*H$l#;K*ASINj<9HBcI=bG|S}DGFR)H$SFA4Sn@_m=>fg`YD)N`Aw z%e$7SZY40wVM zFW+pf2oUGFoA~#aWAe|ZDr*|xR71s}{(E8+>vJXthzX{_uv2v~&_{W7Ik^GKMO07} z1weU6mwi1c5dQh+z=O8kCjjMwqnamk8Q`P&s*(7!QtR; zO=mEQ6Zx`O=1Lm&1()PVg)Z=!r(DQ{#MHf~KgEKAU@TZn~^ zA{S@iag={>^w%VIqrl89fJQmbPQQN9hjbXH=@mfXMA#ZM26a0T<}gb(b-k(VyA3A% zPhcE}M+n9Zk$li+<&m4*A_^2Lf-m2vujDoix|{{bK~ko~B(G?+ie7A5^=zf#hkjM` zu+aWu19ZYEqM+6L1_?fwByCWIAU#!Gr=gowPzYF zR!K($V7T3vq?~cktP~JtOGc3nF;N6ILX)7Z3~8f&?nLZ)q9Oq06W?_a0MyGVM6V?0 zipLO6w9Y=n`#NdYWK|YffKP*No{0h|!9-vSQW}A}3eS+`=%^Dby^=roXlqa~5(%^A zBdl&>u7jRn5sDaxbN7=ciVzIur5SP!U}na}$HR~<1SiIUVb_goUD8@c?x8d!zI&#w z1cZc0gd*}?f<~H(LjgV9to6ap{R@puO@VZBP4Zi!1jSr6!gxbUaP%zO{~WPk8FTH3590)Mda8&{-CE4hp;j?34!rGOeNU>JgDfsrd9@N3nx96;;q)tne`EsBKDv_7OL3<;Mt?BqJvrvr>cyt$O? zK}N#?IfW(43P6VMk%!Iff?3}pt$^$@aJLNx-? z5iHz_TB`t*5Mqdd?xC5nNk#>V)rUPA(Gb*AJsP!GRv$pX05_QJ?@*`!T1jEy*7Dmw z@}7bhVmXc4igb^tN>`obYh7Nqj2uR{RIujQv1284yK~02pGV*yZ1ARY_6EQ_gz?vT zEhzArV`e<(^@6j)0PS`v9=o)gp?&6!6hzyo0k@M%69!3mazqM(WBWaNn#J7R033p9 zi(Fo!z{LysfbS+p8bx<{nUXVJ5z<+Y4puy52!~U)D2p{>je(oM7UpQKR}qi- zz(U?qzJ};xbTenPxn<}&~ zl=F2%-N^90o+>z&HICt|6OR8Kyl8dY26Hs}@r>n^H{jruHap2+t2u|*QNrDfuP3J4 zhHJZn;YLF6AsD8`5btYDts)=|G97D|*m0P1D`P|(R-r%I$%tT5KR$%waN#EK6uQnh zo>pnxB`lEN%ImK@Zrcps5R@3EouNR9=_xKeN^TVl?at9)k1@m52MRZ>Xb3{0R62R(#*g%XF@ju4c8f3sEB`U;q5s(a>KSOs- zWtuwpY#4%On!J2ggROdd`>Dq4w03Jquvm~n7ate-@{#jyzbMva$>?bO8fVNi`k}D` z!EYoe)6uT}#m+||4T&!Kz?22`%0J4$>pdxR9!K{WaG3IVRMQ)TgGCTw*$byg@{yugLj{4dLxJfzx4_0-KwK*Nc0{i zZ}FzTZFWcV6~41nx2O54c^MR5j{c)K@albWA)D-5!0!PRH!`*)A4La>@oST+=<1{V zE&zLh2=UR`iN&EM0tt>cy`hU5!vFX1d-(7YbBT$fSI1hI=uX$8YkJ*&*5rPuP5kns zDH$(tIYPMij0N-A^?5DG9xuM*EtPX?p@xdhY&w_bK}qhhH!7#zatxm&w2r zwcswj&y^EKk)}Bww2iP@n9q8OaF1(N4A1%6mrTa!3XydGL3 zS%3-@AzLd^zvs~mqu+<)E+~9Xis5)H!7f048wezQ8OC5@D-$f7oV3`G!7fS-D)o=; zK7Jayp75}9Hu=EwTYCnbw5QeQ&lHbj0!T}qon}n@dK4OH$d(u=aSiu7J<5#Jgo*-) z!xAMlQKh2~fo-(#;iEIPN8JFk{dVt<4egP>tcV$0NK%>5y`RKGAPu78(o&-EYu%7% z-5%`Yu)7_XM-)+5C%6?b9E5KtMgM8VL9W_AGFs#QyVvthX0Mxf?c{Xi+-!5!`Dk1t zMo`RcxD)V%nm0q~xC65@=i!?_ciJvK4}>59#Wxuj&yxm=^J#$7;+lA5a|T{j~vZ9Cx({II#25j z_HWIV`#;-?8mv+Io0;(Sn8T!eQ>!_`@9R!X)#RrGyNg#pFHB|UB=UJECW>_oO3N^vzxxAZ4<7wCeQ(@S>xRIx*Ucy?b**Axwgx?^yW^MtON` z_KsgB6bJl=@9p;j*jExRkgXXQU_W0QMV?r>awX~iQ2`@M*Ho3iJ%v~SkRq@Lm(LQm z(I%Q-n|BD>gwrn%D(&((7vBxjvf=UmBUHWrS;8BIn}7`LQ4ytc4xc~w&y&4OXddVy znWAD)D5>85r(6l({lLwBcj!tis6$?pT>))4d8^qAj0Li4WZnYO@C&u|Rt5 zE+vMU77h5>^21E|?n~7TG?q%w+;+!&Fj9hb90s5yK6tr`{;mbQ4}v$0dUMu9kpmgRN4>`vW_a-UBmg8MLTM`3 ze5-EcJ~ACS?r2Z*J2kX$8VKlm1TOi@xpiuv9au|_@XyM0^oZ_RHUX6`Le2j=xdF)< zg>*P}L#VY}aXC1;2g|$QvVP=|=RH<{uS;P&(c;Ue4A9gUJA(`z>i&_D^tD6TaJVJc zMIfI(=k}Ng3`%0ys8jW?R>O;nfkNH+&hjpN>=q+rsAlQujNHXM)N8HR6RmaHE$E!D!l^4l*;qZ84}rHE)hBS)8(k^b9F5t!|!N zL|rG|MCfI!zFx5f2;{|jDzUc?z4%@CH_A4Q{r-%n5!~M1zH5YJ51ey`ULHIYNsTB{ z@R*uu+B9~N1R@xR>3ckQKvm$??z(c1zinSaPG}~2)WC{WYI1Y)R!K$7OP)H)z^*oe zP8ar|@yJ6-iT3UE2JcsUI&f|fRw=1NF5iJ^7o*nURU;?@>4}Z@iFbQN|DT`9!Q$;hi7ex8EId66byZ_X1RLc)*sVKcWP^xc8A1lr8f zw6BegN0)BJ(G~?vL>Vv;L&EhC0xh%waq)Zj@K|P+UIraJdj*J!hNa|)bIi;z8xE`o zJvLJESN;45OZF5zO3{m0;UbT)d$FTTki>6LqSsP{Z`)dVgCe${U=L9GqK{zz{GIsF zIOigeH=;KJ@0o=JmRkIdoS2}8v4xF)+cjOa3Jf|XwARp9URK)x(wZ|6zh#KMq-{!{ zo6yAIoWRe-P!{M*&`5HGBWR5z&H(i%i4G843T(yj#vxaH0*XskWa{g{l9o6!FDxCk zt!b1XT_%L<*MK`LnY~{;DSYw$IbT(N@1M-qY+HXOKYKq@d0o}Jwr_xFPP%a{jY zV!q*m|KB7`mxVA@luN|>1iCgN z4BOlE_wB5W6ekGO%or@ah;$+X zM?AU+Xq!L_M5>CU6BsWC@*6;$@d!AFqi49;+20fOF!Cxw&jT3(ABS;1eIz?y#?FXJ zWd-+BYg=0mxXWO17(6-zvjjd+YNWRh+7S&2^id39#i#S@>AJ6^!Leqr`*33AwCyb5&Zg5d*5;yl6^+3O)f zF~L|$@%UE~_K~hRk?#a6SqD=AQ1S1@f{j@Xg*KQC=&S zG6fe$z{>lMQj(Gsj%d=CM3O^%ZZQ#b5g=47*a!!cq?Rt=)5IZ)@OfZ05Q794?N}@do33*l_?~V_;&%Y#RoObcI7OE^CI4qzMmo z0ZLqPke#o9ghNKK^omKIEzZD_=_-BPu8+(q6+ooJK;vRCV72iD5Yn+@A}%h-K(#^1 z@m76Z-EpLiBya&wA&v}0H1YmDg<(0cJ^BDNvLt*_I(bhWL61bAfUr{1&1aq9WXj=MKoz^Ai4^p0KOHOm7ZRRC`D#wKG@&M z2Dr!>-ZPt6h^}ThACtt<;UP8<79iSU@exN)@LdVf7>Wd6bj$-()>iNI+A9PD89;+J z)3@*4d*;7o`XV65pI&sD1Pblz;K~pmDiDDPX$w*uLLw#KfFuGYl05*o?KcxVk`5RJ$Vg&+h3he~L$Fw2?)+u{3X$6GjJ3GW%EJD8HqgJvUmyTs)e zET#{5-l5R=bz2>yv!GHbMQ>UWu20{fc_zCwAix_J2`~Q;>=3e&(c;_csS-tC@$Q?M zs1OKW8A#lAGzOi$BxjYm$%aPxb8+aWHc_r%=s6bgR_K8n8yhQ;C&Io+2o>JftW^`2 zo}p+YoEHRoe>BhFX21<3Q$bmo46($VdBb5>wRqyLfL1;6Cb2^KnbLzIXil@ z_1h{^gRi-AlZ}A6|Ld>tM;HH(XWbH7<=k4rGeMw5fz#!*kE|g4MN#M|U4h*U>h!3V z-OGlW|20DuRL^8bgz5e0#a-c4t)h7bem$BppRrb9?0m{OGP*4??5J> zP}Rvy4lwCRwnULbm=N`l9sS3b_+$sBgajk>eX7R3c+_?FE9anSW!JR{?)lHqkl@sp zLElOXaUR~egwOfy+c)^QhMU3O;5~K~pbczJaAi@^5tVE?0)`RN0T4$Fs!{9)fiqzN zIwNu44GBre+D%|%oUZQ0w@%~W4T4cK&K^Qv%fGLa;FRm6Mc&@13zSGKA(&x>+>cN^ z@iXAtYwQGdlPJZIA5Jcw=LE)uCga3NKh8{HoTLm9y5ody!#<`)1o#Bcfs2k-P$`OC&< zr;s>tO7{2k`K$q+gdf1Y`vLWYBG}^idz(GoBL)91zi1v-=|u_>8W?{8#T_QlDn=jC zN$|o^2*Mo;MdmXinCCv7zg=CHLXcb`xn6)%AGN@zTXdY4@R42vXw&G`sl8DQRhDl%4RK-3a`L1eO;^vi4m3I*l*bY32n=Dzv}?aHK@`mN?>y7m#vi z4xU=@C~HM;@bG7h8i9Q&;4hMqVCSi(-0t3Y0tc99Cjf@e|2}{zNeqG=USfI>D@Y5S zPkQuym8kB0VV;}z{;Mh6>q#aSlPV!^d}xS-)06Sygwu`QyACo8Fl(?KfOn|QvgvA1F~oXneB$EV6lR%N z8!>u(MrmqI_T(@dB3ts321~n`4ku1xEl7yr2&vbIoDCLV)7@u{pYDhMy4ImXb$aIWeN4y}4ADqHAFt|# zsgj7uOw0RltUFQ><2d6&Zb}{*pg<0sl)8Wl(JYYd3=Iq{0IEu)n8fTG2kT6C!`8C@ zUdnNDDeE;j(4`|Xdy+vT*z4lZ!Y+UC{PN6bjUmVh>oIxi(t%JpVpW56QuO`%#){f) ze_zIXL~xQ?5ugp2BYbm;kyBh~`@ltn0N|LigYk%>fugYE2o{glEhma~fSlyD5ym>8 z2~x8l7ldM7Dmp7f`M+1F@KJT)-U3(|5Fnrs#%S0QsPsNveh8ZnKF{0b~i*zt`ZX0Y{-Lct*te#?linfOvY5WC{B^4YNRz zn9?=J2h`M`t@mh~+>*e1kC|PTnVr$tr+QIhP}#G9MSN@5q?$Mgrv9opAS~$AAM}Dn zkioQsQGc(+!EL&{dlP)67#Sj{E7CF?5vuu`t%0jP?ONYazIl-LfOhFvPVxq;M;FHy z{(0c^H3o%tcGn{}vSk4zV87hh&&zXbJ{_*(IHazK3@c~=3 zsBQQ-?yEyX)3NW6v${BVFX?4DA1dZ2Y5g2_F4HIi{<#JzLRm5VF@QXP$Ucu+J#s$k2!rvsn zY7QXp(E52v7SIQV6bY!aGrtVhrbr-J_J`t4a@n%j zYPbD|f1w%o0b=e8wa1*IJkCM??mS!~JdE|;hQd43edXVE6uziY?84uGtoyYUuO#T= zeVq)fa|;kGv)gW{Ep^@=4tIwZ`-Lo&b9b!&5G?#G(Te5XvR2CO-MzHjJ};XjiGtvR zjiK>0?q#u;m3GYx*(#r@s;hR{Q@$y}wX(YOFSDPXt@d+(MCXrOX6DfcI%+d`65Dbb zuC<+VUBiD^y6sV|P`rhbU5mY3gwnaq8R08Jb-uRYips#|Dw}ii^wc0=0!p4=X{F*i zs7lDFPPXWWV{*Aeg*&3{fR>O0%$LpO%FUDEt|At#{)R`UC+BzZ{tW}%+4F(aUag5| zdDuGjFs;MGm+gp)c5CCM3bJCzGKFAP#R@)mlSD#z$J>ge9fASCnC7Jak_~rcKCSAAp?H?hr&&*f-!hRS*q&*%KVLde z;*n|R_I1t6uIQ36(iDjt9U2k7t(&P4h_KY zmx!4)`WEQ(?afK?0Mvb}{6=7)_TIgCH{05`uQ8L{dzKZ!QD%}R$+y>~Q4Cjpb#QFV zR6|p~XKOdp?MD_pC-{G88~c9XjPRXsSl)jYo=j-*sZu{9cFL`uczvm2i!h}CsKq|o zYq}>X4TJP{b`oEipbID)ZM5LsQ1-&GsrPhR!2NU-(+;Lf?zDo5H zW$Un|xjKxtRyYW(=x<|UC?GOL=qo{x%1>wxVDfq748n42JxF7rD4qF0cOeGdq*Q|; z28EwhGjr*PN{z0;G9cp^0f;hTbb&T(y9r=O8+a?6m=jkQ9W~PlNJ#_`@46 zl8tjxB2aj1po3JJ_6gQc*rNBQXF^u*(PYs>uu1CkszuX?U}30Zlk?|Xi4qPo8uNSf zP_z@x9i5g>FCtD*z$ghzVDhIg4>4ui`KTs;ZcN_!of&*gM2LSD*3gB4oG$yPT8kvb z{%;rZBfYm_)9j+9^XF0cmIO2Q71dY2u`XBieb6!N`pzsV+V3A$?xg_;y=8;7w*gW(qh)kt|ij&{6&)C=p;wDB455uU3 z=c8`KP>a+0+7~YFQstAyW%*=a+JP03EfNQ2Jt`&U=AK;BntQQh{MUf@6ZJXT z9^HgTwY4@3VVAfN!Jv2`Q-sQRjp$aJsxG zZeg1rqw!I0k9op3Th_hOJBM;H6V``M10Vaw}m>6 ze*m3T5;&+oxaz4S?Pspqnq97WAsdsdV`*eo#7cqw$BVR%5m%ulT2-XWo3-RY{A+VdG! z^?kqK$XwnX#@+R}NQqhr{Yi8%2nb2N2XMQ_!vnj|xhL^e;lzma3F;&*GouOTbb#$-L@bf1w8W0R&m|2O%b^!o$Ct6i2wN>`=v zZ0z0>$0JQH44WI@!MaDSUzvEU5PR;4EyTVahb^%W!ko+I;DVTeM!c!FR$unllRGaNS9-=*MY7 zxr@V%5pqnzQ`@|GGg0#s?O#VZ0G{aR77P&ru1RcW@->fO)y2h00Sk{!P30x3B732$ zrTdmnSKL}8_^UQ#c|c&$_M`9aoeHZ1(yYs5p>8d0y+}yt@Pyww6Juk>l-l6dR!bzj zk@UQ`hx*R1(59CU$Csv_DZ78lM4mxRZWCAhE;Wai1cV3%EE-U5UbW1^p#*)=&zYzThtEoPu zrnH&di(NoXnEyWO7cVeP0!XO2qPT_G`611HM-TZGxzXJhUcL6|%g?~T?d&i>4gqCo zVf5YCunI&bhIY+${Qewmz>!7ZR0z8=xNcv*N0-S%6Z7 zx&TR*0M-+r?HfFODPro7uL(mEHtL_eV@#h+$Ys8ztIJfrI?Eqt`C|9S8;f<{w7OXd zY!2q)-@J2&Z|7oN-DeH2Towi`Sja0T5d8RM!!p&C->XaI*Q|U0%Oii2YWzj90hCoA z7h@x~uiL2aXy3Fm0)+q%waGbk^q)=G7>raxV&c@yTyCy9hNp1CBIxH8X5lu3SsQ~K zcAbQ^tGTR`B+^EQ1Nx+%YL7qy`j3HT2;xKg|J+v2z-gO3d$_5X`4vD|VQ4UtUPN_G z4Vpkxm`s9B-KMq>r9O|i1WxholJ>VEvD4l}RL3Z`4L+?sGM%j(zVhYYPw#Oz^7iM` zzlUShbo|gWGrydU^6j@Y*rX0Tt(OcxlB#X*^L!wT7+MqJ#yl^UqVVpof|V#i4ME#r zGJO{ZC=-Q&x~FS0q@#uu7_qdD`Yo%Diez;OcCL;pW}R}ATc`!TXVW+qb^Gqr(C-D*>FYq187zwQy0SPQlVYFg^vq zl#C7~#Er<>m)ZDG+yx|6oA{u92bM2r+|JWS~p8T?bN{_a>8RK!+r7TG80M=P8P!FAuKBD zD&mO+B1Lw4#7;85eUYFbK?`spPMzlgM~a6g^$`)8Xa1lskA@5_5B>JAuAU1bzwVaO zuRh+;cCmJM+w&QXHhL)ViRfq*4X9ee;6(((1~Jf7OE5Ub2%n`laTr}(4?*KKs97&rp7SNph=|BrPuuVjOhTkzN(quGP3^3 z-${XPs}bd*ucP;I0c=-t@)MNye*Ww+5}nG!bX$Kc81uyn{hKF&deJ-F#a{ zg@z7-Ie>LPIMIn9jk)-9FyVBH^*Wy4w10?9wHMIMvWqJ2w(d_nVzm zUAmp!ft9iDEW!3EBOJa2tArR^so9fumQ>DoK1FJSxxN5x570&gQOhVMbnTY@&%3LW zL`=NlFzCz0|FocOSSJ5M$Tzpgdah2qoE%Quk@`D)_;1)qV7$mSFUv>%0f!J&?(06> z5VPwSF!qi-D7hVHK6vELAdL;9-jGs#Gn1c(V1;CPGod9C^KfOIY71=qn#!4w-@_JW_l!Ibq|+g|P|CAJUHd>i)ja zR#!O!^k+Ys%x9Q=pKkNt#5jh7xttj zifxx!Bf~kRh;~&lYBvd$X6NN(24w!+>)%Jz_)WFTQV!;BQ}5Yz+82k5YV~WLjrs{f zS#nF4=cut?i!5TFL;3Zr?Z4sLI51mHx-wc?ZfVN9@xwDM^jQh=hKPGuN8WfeeTEGs zPzmzmi|95VoHj+6n`n090Ym=-$cSOL9$`@9=@^gFnhPDT54OdPu*N1lsq50dB1oTI znqi$A$kjskZn-fVfXlU0XIq+wM&# z6CN*9+{_`r?%~yy24BP;Dk^A{R8Q#^*!LDzJ!>kM&^+@#HpIdC^tbo6Vydmrs#K2m z?aqqZ&ef7KXgd7EOR@KQ?NHG~%daf+&{%lpk(};YXf|j3`P`R%F1&uvg| z;*kx*g>eO+#ZGJqQvYk0{xhWIW3)zY=}Tgb1adw_=9VMW18`qRkz$IB!9mwr90%X{ zR0aqw+2i$gRbUvC?(xI74{tnpamJnA@8mo*Kufd!$h&K{OCXAS`Apkfe%otXc{1{N z)5rs}Q&sCZ{CUKSC+zY|3Pwlm*+4)`C=O1G-+rS1_xZ{sFLMWtEi5QA$V<}fpX6&h zpD$KwZ(lj^YCQX7u~CAAeo<1e<_Efl>5YReo&rU35`5KkmArvG-elkgWCAs@m&n5) zB{@MEse~EV1OITg`2+_E?|5L!aO?r5~V+@U$~ zk>2EA`#3*wzQr8JkK#e|8?Wv1wnuw1Rpxx| zIaQguj5=PL`7+!6g9Wo4+{??oCOmRC_FF0MJaeNW?^s}>xm>B; z`TW384I1B@v zT?NtA|oS@7xt^1XqbMzW9fDUa?FL<^Ig+FcqZ(lsquW>t{x^CQnpa<}qscYfu9ca3PrnuA<@L$da0?IR zJleCWGO>4YN1h2RA0Mc7t4C+dX^csnYL-fC)V{L&z6u$_u75hpk4@ZzodkNC{JS%o zSb0Q!avP84s<&*U>W@66>>ga0Y|J(Tx@2TP;PC!EzaG(mikb>o)4Kag+6qNY=J zH6ln!pEZeQbQy{G{_{rFgTCsKS`j^OMzc;k9oIkADB|Tc%0azPH@p=|tAAzlh>@B( z@jJhAGE%jJd%-ugL=DzQgS2y&JiG4(bq03O^N(iyUOSWRAsTTwvyi^!&!L>Af=a&S z7l*@QKYspFqPI|3=8^~lLgzQh4&*?|3Cf42pPiAK)jwT%N85f#R+M?K$FEal8Z;T$ zN>dcF_L6&U(96eD6ry?WwMwRMd9UHQ6ZXDEdA`p*L<_9sybz|iC>fg))?n#OQ}@GYLAq- zH+f04H>iZ|S2cIeuyjftYU|K%$mhwYdor4(2_g(>IkY?kNW{)i@tl9xcR1EVH0!Z8h;YpTu^Z;xN6o59D-&d&^oM`Xtyfr&G z`l|Y7*vkfK+APy+d5+ynmcQA)Kk=>5N%61D)T=X1WnC4ocgW4o6okAtGwqOFJzL~y za51o@i2cJ>YAAovldnh_7$`2fv;5~E>xxHW)ouKX9cIVYG%hxlixg1`D0es&roaB! zlE_h-B+rU4UH)d_jh1hH+e$yrIl0y|P+FFjmlg>Nvbxv$_==arrM4P4h{K9R?YuSj9aj8>rLxZOYTdpXA0?O5 z&Oc92&uaOA_G2h2kyx~V4$PIKDhMS5gaHtjf=@OCVP@U!OT7Lgp}+3G?A@9FY1Dtn z2&VW-30X$$ojkK8SGUQ`y`v~2qeDNPNZkvF7BDxXr+Ws%KA<8Da<8rsIG|T*p7P?} zvW{ETx!qf>RX6BMhwtIqG3^m{zD28A-^IKRpwFPksAA@zfu8GEj+322mi-_4sWS8T z@^Wj|(2gpck3~ybc&NqyqDVnDjgfbfJJzJXp7L%noEeyKL(=xp;0@}p#_&uDe^u$avcg>INUGtF3;oZY&Bc)^2yk~}WRO$}A)Nia}^d;%0q?7 zsvBiGCbZ|wc1GyCvRG!n^&O$x^DR-aH6H)YK{Ni`l9%%&l~QcpN@=;es@{8bT6Fw~ zjI!M$c1m9@HUG@DDTTtoxawKmjN(RmaKY+4NSg%&LwSQmPRTg&c@6u7uJQ27DDK?! z@&wzAYu~|jrm0WrOXcO(c^qKQ9dfbp%b<<3Quu$_F##yb)3YF#XGeaD`QREG5hwF* zm+yD6OY8EiM7gJ2?)!uMMNw!X2voYAG;yfdG9f81&tvoOvXJY~h!3ai$D_*9A~LBwGg9Pp-oP_zG44Mma6JEubaShk47T-O%h>s=lK z3=}<^K3JD``AAO6Lv+S8_{*VIb;_X8LZg8hkuE5^<6*K#2Jl4 z7g@viB;+wxr8gDmEgUL%OJ^{e9_Q@zWfT~6qo^U0arHN3M$&RXs<7+`s$lZW^I&@r zyl3^XR4?;2k9XyXUS*lG^5>g-+qcg>y*<9VsetPQlWQ(^gW}0LE8t%rj%79NKR18k zBA2yy(y9;t&vQLH|6J#>(8Ot#%vCFT@-8?O0>lgjA+_xaoA`Uwp#j*l#|P!#ruFz= z&-SQ(=-hF&C(5(4tT(yfvBXBp&b29R@;1@IN)Io-|Oz)0^jy9EMyfrvFaP(n$}2 z@=pBm#h1D75)3#;NtJmH}*C5lu)jatY#6u;kAB{yYn5Lp*~ZM%gu4H@G+O>&v6c?wkX?* z_@C{A9Rr6i?0);T?v0C5vwOc4=kJDvHsh*nvJr<{hhlx}GA}EU^x*w#O=jhmw&9;G zO4hA^cFGr)-#u?xa(iv%Yd1sqqO`qz-6BbDKW|3$-o}M9wJ(#*XH-0=TvnNQ4ljP= zq$+4LF%S$O&2pkb4V@1m92aYiJ>Rv;wi?(Ni|!xJ-x=^M-cI67rH8uN$mdt9Er~dd z0thxi+k$+QaG|LfNdBJe^;Jn#K(mzqvrnEpsUO_p(9H|;?i*m_EZ?xfMB^!o~Jc`O|X|Z6|L8I3C-SkQgvscXdeT`RJ3?FC}`EBpMXkyrC-|$kz;g`9) zM(94v)3<58aE373Xm#k8xA;A8L}lp4lQ7-p0=meF;`@tEpKdM;E}HL+sQI^QfOw$_lko~M(-E= z`ugO+^33V;f8qiEvpFWlrN&T-X=xffEBxXU5}0XD{UvA!D~?}NJoPR2qJHpkGlwek zCv5xZ0d+oUEx2M_}|6=+wI+wTnu8%Q2u;q7S%w6s+f3$oKXR8@~ z4$sx(*(k!sQtq@}L1KHVkATvf%V<%RW-Xz6tSY)GaenV_MgwWV^H|%^ZmK_!xu>7PFzX%%kzq#u!TOq*uS5Z?mK&MQ=oF^<05v}HY1ycL)f;FA8IyZ}@cSi5C-o9kzQ6-VZvX7Kem*I@D+E*j-cF3m_Lw?JzRlYTwHbhiuipPw zE3TST>e$o9pEKtjSJBLut4(Skn#}a zozVwz6}#?daR$DuKea_?6?*@RC>4qA3PVPWn+7ltf3HyiBmz0xSvEU{64PsBHT zuLAjA?TLBhJK~As72PPb6^!3$uNGeGq8&N2_no%yfJbV9=CBsy-VF{KA;-tAFo^Gv zT&17Geu4#WWR<>ccoFPYz_X2lA1p5n}{a- zlLe|0+bP~~CMJ}YjuG~JA9m!5=s4ol%47vh_;W=(l-?*Zz`0Rh*RgtRo-QzzR0H@h z(812AtYn9|(#MB&F^!9#fjS|j>7W@xNm{b8I?G(?i)mnqcxK)09P7swk-)0+Zd|*V=2XQ?;eigKx00IE!5QsM#b}$ZHU9=W8ryV)8(Hpoq1j!e}v%H1Cw;F9k;L z0CAC@vCKizXoXMSkL|t6zp>k_dI@Kj(C}O;mg!Tj!y5nqho2)KW%>pyB4X1!t#wvhz3i0!Xw$$>KNWG8VoA%U+{PLRS2M z^lR=wCTt1te?0Ur6bCL0G8oa9?OwKyBw?8WW z?cs7LYRL>+&i%jrm+vtN3**|k<@vK8F^+u2-0&2zsxhMieSHxrC#V!y9x8$<=ck ziz-)C0^{x0FcuAA#!Gr;Jgczv3AzQ9f4+?e1ilO`^l@KWm$7!e+qHcS%T-qjHAnFG zwPY;R<9>WZ)wlhoBM=f5W6?-^?a1~MzOLmL7KYaLj;YQ{HxgCX|3gFGd&vf8GwF!+ z3q56NE&M#obxS|Qob9S9%QJ!K#MKCV2ppP;Z0wen>mEv!hakCGI=Mn{)s8|M($O%LDSE#6v#f{ufhEel``;!ejvTP9+~qIMG?lUDhyPoq z*GoUGT`*;Hd2w!L%V%kh(aEy2YmXBEyium5a5 zc4mxoTt;U;&i+rdEtrq9Po8aS5>@W5M!m%}_cZFL;cpAS$7es&<7CG6a3p97%7^J% zUW+_<{*Pc>y1}{17_$7f#v1!ttBx122FmchL4V}-F{d2?*Ix$WiZ?k+My2p3dSkoq z6I+cV%cU;HOi5oHZ9s`<(*uXj7|G}A`0Re+8C6tv;(M;^iii}c^gsV-0f4GDuf;G~ z6w8JG95lbKHjur%d*9?;mi&58qo0|>S-_Md%D4R%-A(mKqcqM7fuij?W~lxA`^yW5-;Pgyj>(U4s}Q&Hl6ZZ3CMRTa`uPEo_1&#&qI;Qm zGy~WqTx3G>8;q|su3q*{Ob1N-J zN>te{QcO0T>>so6-T!3#=H_^|;nc?0gJ1BUYGU=8mubtI404+!c1F?c1kJad>{#pb zqFKz(-k*6b_kxh@Nk;b!S3xt^{@q7Cnna?NH12BhsI<5X+*2_u=23Hu`f>=M*HOn$ zG4?GIlD_Szbp<5E~C3|>aYZ*$Q$90F^y{sSn=e4%4oa`TNjzvOEX1`$GDnfc+q}{XF=D`(kZp4A1 z6+*GJcmJZsxcC1Tdv6|Az@>}T)idG_<&$M5%jkM}s<(n|RcH(f~N zyc5`&eVrF2`Pj?4JYG?$6Zg*HRkUpKKRE9&;f#W;x1k{$8-ZZphjfJbjeAL_`Ul7# zJnKi-2flwKTgtKc6<^#q->twJ!Gm*NG6VSNmA$X_;a<{@6h3=LAoE(uT?q}lXQ76) zu__YIkhvBk(j>eqzOvUu_Q9SV^s;o(RADLA81Ijv5=-Zrs&f#v+?o$Sv93X$X@JQT zLqHOG3OCgVVQ$Sa`<@xqpnF0E-npYeW4^#mIaXnP!s}MG{lc~BKSl4o?3o>G_wBGf zny1^+?Q0?f&1Eu%>vwvlon3i-;>TUgy?La>lBUr9TY;mQc<@lOVozQ+N5jRDcW2_^g|Jj;B{7sF5Qb`heXxKH<)&o2oki zXj&!yVMma|k1ssnFe$ZweazGqO$PN`REv4+->kDAEpvMRV5f0M#ZSrL3!`sEovN=h zFGbaO%NI9fA~*1KJbb!ZpWe8@ zoqdtyrA%sB4s>bt>%Z^G8Sy`EWaK}RhPF5DLf_eevV#rJKkQb*W<(}S_P3R>#-;Vn zKBc_aSg)Ckt)l`i5TS%kSQ8H7DTB#w{NLdWMR%%x;ZTczv}d|JqZ)eE*l` zz|Eez1!P!^O<&1b z`68ax(7O9mdt5kZ`^v$WgCWunF9Ge{-T&~Nd{IT*!R+>d;n9kKo5z?a!*g0#UIk~*tmZVk zDM<}<5?{1&Hz0R8(?b(V30}EMvqao?bL+7YBru+~(a78G3GIvI72(X^SX0%lSU4&j zxR-mrd{8+!*trR~P2uNbcnt9u-uVOA35a_sIw)y!;1M&Wzt!nXNwKVk^)a5a)jw5L zPk2I=Vn6u3B3`iA6W>}~=BUuD<8J#a`%Pq}cOIdKWiZyWm2ZXaX36r<02?9NKJ9yB zC*h!^I1?#gWituB04`dG&5lex!FrarhyDQO$TxS4gWWl*FL0Q=)YS^_F#MbMDurG3057IGbk65`*@{}EB?L#1T)w}%ml}JaOd(E-bZ^6P_MQeHv$VTXG zT}mOdUqu~G55&Zo@7VQyhahr)&a{WQglBDC(8wJc?`*vzw7b=QUXDn|zMV0YzU|I&Z0u zPq3SOs?)UY+)mxX&(YTTt2U>b42A|zm~EF_YK5uLFe5|r#J97aR=-Cl3I2M z9^Gxi*WJ*kj;U`&+^R7FVGVJqrX@jMfN$8z`@GuH7wp zeqMI5Vz7r`qM1_2I8(`>SEAw2H?PFgL*^q=#|`=4d`TY>78Ml@Z8jUdlaZ9PQvdQi zsO7rtbXvb2QdZf1i=StusJMfK@{N1HnSVdONpqM-!Q#03IqdGS%j-y&kfKA{uBB+U zj42R-?)j`x9y&sZ2l9YOND?Wn4jw(f5Q6%d6q|-?n=nfua7T@ny1KJTTsjw|JGq#8 z)04kCjz~;yOB&C?U71PALl)Ygqyx2|rMvl}OTw`@u+Azq7suZBbvz@a*wZ+NzFaNo z!0P0kwU_7FFPGTSRC$#g%-m>wu0b}o!;UC{U zVD#G&f?(vY?_ZqOT8!X5Xs1_>VBoqUm}uA|O5pu^T8sonh5#5wNjKp3+s56u-=5E! zsv3XFy8ErK7lo3XyK`+N_d>4P?u=t4k4z3iUwiuNJRJ092jVW_3%;Dj^#0lBFVUc{ zK8>0SWp2J*((FZ_UA9V8Y>VD_7sWX``ozo2^I~k5Y!#B6(={?f`pXxJv()K5t7X^e zj`k^hpDboX9pri%C27SCVa!jj`mu_P8@1S@{ZH|LINX(8L!`AFn&TDN%L4sekA_aw zcMa)-)!EPhTw%}=XW>-Qh3Bs+V-PQ5E-4yN#~Hw>O`;${o7N84K7m%N)J!yW=$Pow zkH?>%xZ@)D=@H2*eDcjpi3QJ22|{|?V29Dl`mwtS_pO4Z# z;$Ac6=+%=@alXxuA*fQ;v}|3yN^WC}juaH|9px$Pl#(a=?u)+sESNfH+3Qc9D!U2~ zEv~#({jaHIA+)DYX!1th!Jb-!xjFti%$ULPM>X}!o@DseF^_ZU zyH3}mB8L)W52c#RI+rRM^Ktih%x3kj{H7`LLN&{6pr#J5W78adq&kE_rUIc&X%G(K zgEt)?9h*26k|+^@0)gUuyJBFpnzs%xDb$spAJ6|du_?zBJ$}b* z&ZYEOyeC`B2O4(UBSMZip^g@9mak1b30=fIO1zm_OihX6cc>q@f(yja$JT^4bnoomOKol4sc94xipj(t^gFjWuX z6`9ZX(U{t5jHZB41Ql?~S*>t^TR<2e3QX`e4UMM6dXgkTA!E;8D126ZdQQ*yvbr_P zh{n#YTD(5*fNGkBmE50j2Jd9u9{;U9t|ZLBSY(6v*!ib>CeAU=ke3}9Ng`1wi+%b!AFnbR>iGB)zVzjI?i;!OLCCc!<~Zr5<4&|%dHgV@E8g&H2dbE0}r z@TmNHv$~d7Va&d?tQo#-Myg5bSsoIdfWRaP(7HghY!NV<7BKC=?=v5KR2OrRLno!QqN`dtP2Kkd|)wl+MSw$ArHG zHPGh!PJ)zI-0u|oMo!Tu9JN!t)mqCRRBF*aUU@v8zI+cvf=-naLKNMC!iIdE{qsb% zI{oG{fL-;@n$;S&_{6h#Do4?gd9XjFK?zKK_Q6YYL>`!3cY$Ov@x2ihXi3yg?#i>- z>?f+zqzR`T9bKwO|6X&Zn{L3=Gs?2&+WwZ zaO+H`4AsAUH`Y1F-)UHaQX8l+Hhp+xQ}IeM(VU%Y)0rtl?S+Y>7q%nb@~PAEYG2(%3$I9>4zEE6ig*bt?7bi ze3jkmi$p8EjeVk7<$yrn*yewc3mX1>w3Ue=7kqjr$4+>7wao@+|Rd^@m_gh={ zUWN!if-S?=uO#$B&gHub94#Z~O{So_nBzn=4U^Q~AD>D64PWy$gn;-tbVADg2Fv(8 z7C4-f70nG8`>;eF)N5-i@*-oc9`{VxslWq4GC^vjx3q>t!J_Snlpzd8eUV?4**3N! z#zDO7ZGmcbgTgNpO~dAnrL7hc|bojOc_EKS+w6DScT|U0d@?bPFOp1FmF+ z3-`ALT%z;x-nOhczH59WyUB{hEM0=>Mpy6^8IY${_Z}hE=kS-$pX}3TFBMOo=`cKy zTW>h=aD%~zZ-q9!Z|}#SZs)S>_{bX)r|pUz=;qky=Z6_tQQXI_JS%F$to!P%mX>S* zA}>~^p1ZXFs>f`fJ3iOL!?Q$w1EpJYOv>LV_qsr4f9?u{@sYk* z`?f~!c?Y%WtGj})+Op3VbEs_ivb`s>kE8B$#AS8GusacwN6OZYZ7dkF-n6=J?xKw+ z-DCwFXZOtdG(LEHpJdVZ()?21M$af&r-Y6tAg)Y{mL|HJmdHhzJUilFwA9$;YeWqD zqHl5*cgv~lDbLH}j**Zu*EHE$eohzj_J>!N<9ljK2hbHkCbbzvmLtGVs*-xP1b4Mi__It`_?XY`T34J;_=C_x#F^&exda* z4@Po97qzwS6ArF5rrzwnFwvX{p0^TALq7@|^zgzfR zId(>bYPZcPKO!$Yy<2{v` z+3U1;>y<{&A(xz_zPkHj4!bO_gMAd1&K|N(+JnXccL zK%?mMJ-4N{{?>$@8-kj54g8#@`UCu&mG@F)F&-H^?sfDtj5-Di3U z-o5j7D~?&hb}LWn0`xWwgJ+l$z0zl}esgdJn4njdldw`;EUT{SB*gWv?@HFZ_WW!V zy1Oc6=;v8cWCW}pvp-+T5m{HpJ#p=3$fGS4<4iz>Y1n03svpTdPXD&(dDqz^QPAS? za~2R&KY`2JrQWtU8gXb_m8}fC@Z+%Iu{hAn9Zr|eU1p}=wB8YIbD8<$$}!``9S=WP z+7K|=8(^~IyycYyOm=i#spU6GBm}=tbnx6$*o4Fgh0$o1HSjF@Qeu^EQee3A@=*nhf zw3A`o&rg1xBVW|v`tEVkY@#}$%;i2hex2!O2~YE;hmV%L%vkwZVeZ<1O3tZbR+)x^~zEwf-`Fb4QD27 zFXP)FXZ!Q8{x7~pKsA2Wt=VmF#b$OKkouNA6P`zft|N0Fs}^1n|DkMjST{3_*@=bI zxt1$aM(o0aQ)w#pXEoD#vbhXv*VI3}e~NC@u@|`hN%jC^7mF>bH~=kMd{{U|?Y`YH zJ`0mc16Fg6ftS1+QiQ{x2qkfc>m?;uJ2*j1Leg{KrV)bcJN8Q@f3zWZZ3T|mE~~w> zXId3T+X#h)x6NTY6c)n^n4G$rLnPXE-aT09w_IagcA!YLCNZ}(ciH6b8o(>R!=yyc zKr>_xQPjJXMqt+bBCf>?zlKUE-I_SSZf*TaF@bYGtvP!pMY#9VZ2J-F(;MI*57eK2 z_;$C(BDN+5&-(PnYif27@TfVnW<@2=mA8oOw+cY`;TA-y6D?ch{rd-p9UC6cG7dC% zbJ*i+x&$Po5J<=!95Y{kQr4q6wq{;b?cf!bqvwT!JPz$YeePbNmK%69Z#*M#_2s>Z zg-^~==kaM8Q-L7)$ts=4w`;uId3hd1x#Jb#J^3wDw`i2H@vrrZrxC{7ZDG047uqTM zfdZ7$CyzM{ETn`oMkyRgI8}!Z8KW15>Mq~Ra2g-|NpXfy`M~fZ>>%{%Rz+ZxeYYk8 zBmuM^>zJ^r^)h?RMtbFC_WY2r_27$pX;~ z^cG?hUk>B}n9%H8W^z--x}Q-jaKJLaq~di};7zVjkvaSV73eM7g&+=qlpE zZaO%^)`WxsDgvI(fl5^LZQ63AbYR`3iy$1VQA9~}Z=Kb8ojqm~z0#FEMmBp5;pnHs zlz)}gL)cN5K!u!^FSpd2xD}D6Zba2|orYw&kcuzV0W^&-7(=}dIRzUqD*#oM1ahyh zK|?c>)K*8Wt6Y&oL_#Yt{6K8v4ooY0Fs$X}=Wm5;iA26>TfAMyh(Nn~bshBaknYM3 zZc^FtVQk%{On0SOC+(arer`_y#N^e9CRjr}$9n|0va$F75l24z4U9V;#4<{9G=UNl zlb}9cDUu`Inbxp$D*;*1XbM1+#DZv@OPYi|Y58w>EI6d1JJ~E)q@u^!65*r_S5%etRM%8bnTvO! z)D>7o2BSTrR^g5M#+!onplbUmy3Z@>szaFik~x2m+(r0?F(;g9#*7&NiQXVgqq}tH z+Aus+M<+~^dO~t6dwYAm;hID2UKk>ur4;N+-Je7DBFnK%Irnvzmd2$qJFB|9Ov|0d z-u9*Fw<5JYm;5Xbt&mhzkc!S_d@@%Xx!|W0N42#(Y9-AD-5Cvs@qll)LyuLZwrm=wGRA&|bH(CsHWb zCCxE%HE^@}bZCNc2-j_AR=C>BdbZ^j6V7gB&0QM;o~ya6h;b|D#YBJ;?_VqoH_%r< zr0zVX*rnp=m=RSRmBI`AbKc1J%}>3Khm~k~)*s27SbtoH`Np_r+7`|SQxutnn%P|E zawq1aa0U0exkCYPQwwP*a;mF|El+&+l3dKDsXZvN`OKe8D$U7y2&pcU`&rVq+DsfDk{eIICm1UV@n;rv%)N@owtLC(k+c60Rd;L1+(D3MJ;Y#-(p-BhW7ku)V{d3(V+T~?y z_dhbXI5&B#$q62W6TDEh#_kn4!4F*!qD4^)Jq4n0=458uu4gn%0l$5>dzMEnl(ZZ& z+b^K1zH=SphWfyBj1L(R8Y)z}r|{QX<(p-ux#6loTWIvRRn9ABZ#m(id(=No zGofvch-%Wz`9=7UULF#tZjQh2Ply?J?N%LLQmNQ> z06^>YZ;!NUTYpNHB&kU53^9JgyQeVZ;_@|*w3yrD8>3?6d&aaGw=MhgRN+%tJ42l} zR>nK@2;FG1M$Tl~y58kOL_T}K#{BDe1uwYGnRt}XH0#u?fe@BKs*l7urGdCSs&k+c z?`^BkH}mq#FK=^yBvBW|Q4)GMJgRt^hNR4f*#P%W|aZ1)z?fW%!n&(3j$ znfIx4l?lAGzRhMeFZP}9t(XI47n#xUWyyO}qMan%(@rg_Q@;7mH6D6-SGXqkR)NS- zmnYL}cHZq+j^F`L^(VBacRx~XCr+<6&32z#ef+_5;D)Cq}m9DW3qqSyTh`^XVrFc#?zXO zy;H%97@M_WeR*x~&JTqQmg3JJOF9Uy>y)2AOO(xo5g<{ybSpz>*QK(BXr zOY@9)TE6h@N_X}cNjmw-sPbZD=~s@IUXa!@ckA4FMEp{fng-5%0tr^Bp`I+pWuSy* zT!s~waaGHjUM^%fZ&zm)wd^kKA3u@B)T?@^D8N|tP^rbd4)IcarC(w^w5zBcdM&@x z*;B}8aix_`UE~$D428+>KSOBBPgM_-iyS5wnZrpGeSbL1eIgAYk`F=^xKf#u8`t#E z^}WU(qewpva<5qE3pu^7sLomS?#oq43fOn?ONKWp@UoL~aL~IV^v-qs#q3)<*pFOe zm+@lZys}m3!ZcIwr!$w|Awpo)<>cV^b!Tsr#7*&j8aS{qTgXtY-GurUICUQz{7w3v za-_Z>PVz8~N13X+bduZ^E|jb{%@mqo&DYc~LWpVjGiZE8Q^-)7FOO~2&WcG7W4wVI z5(y->2|QpAXsu-7i`0Ek9!dip5o(4?e*yC<)>_@O^LB~q(M`Pr-;hhfMVsTf1zJ%H z-~4x9R13YIMs6$?;?-<{T#1vf@eNPR)l=+?NC1FFsOIUCP?!ngN%cP4+i%9%l~B@5 zs(SUw1z}I?^V|Ymro~lqts-(QBX72te$eYD*?ZS$<_YfSV;g7Ij+nBV|q^X}#3Rs(bX-b9^@1Aa&_Q=yajO z$zM}Hrbj=%!kyhsmdIj*G?rC@098l2?r&^Dbwj>-F3WRvhA1)^#a>BsKy&%J@W>i$x$ z$tCH9Pe6x)7EJq3vLq`nVW|ljlRbu(@B8U9n=k(79JaS)If?yKxvm`JlQQ{fv`y8$ zctyDRJ|f%e4Rzp79T(Qd;}wR|?Ivz_{Fa@5!yBpi7N`Qe@!ST-eJUC6m5}RJ)CuI_ zaqbn0TW{ZvcZ$0w~z&$bvrVa0l zg0?}`)Z2QskBTjw@)~Is;f-FWutt#&B@!N^dYHm;)k9Z@k>ALIQc9RyP}PrIM)m$A!n;xsDdw#7 zMrED!wW;lg$zi{h3f4PPYYZUe4;{vm)86@OE$xpkhp!-VLmqTtYfVeO@G8q}nAyEX zlkA_YwP(%e?b@wJkHV5v!IGR5G41v#hxCreRQ)u05>dStpC0pSm-^1u{7yuZiyEtT zPqh0Eyq#!tS^^y80cp9|)a-_xT zB%ejk`}yQWWO1t)a)@7xbFPn=2Dyj^W0Ro2CKu_%MgCb}AT-3LA)jgja)>ThAD6{qt$;Q z3j3|t&rTf>&%2h^x;?rJkI(UR8;=rG#?4qfh@5OSv%R7Y>W(~@I1x6 zrEYxKe8exQ;e`jk@au$<9aBxH$)>ui&cLm@Sb&r6(PsMbvngbqlWn8X2Sou{kF))+ z;`qe2F}!?u3*q1{A|bLC8B33QM-(PT&tV=AJF3$x(7aEOx+HciV=>_?G&L8H#5$;& zzVt<^oz&oZl}W0AtUp!(*LW|&NYp5D^B+6S`sfm@JrSuq4++hMW#|wPk{7wPPlh%7ov#*4}cfVuT|Dbc}|LHqSc7RNkbq}1$4#36OrB-2wWWI{y zZ{Htwr{n63ozBGDENts|bcN8XMD-=s>d7=ZZbfGAYXMc8{`cFTzyzCy$doJY?mZ(8 zFDJ%jk(oG+g1k4)s5rh5trW|X$Aed%r2Vf}14mBNB}TQGh&bNP0!sUhfzn!BB8%$M8@Qy-kRtpxDZ<}O z72)oZs%ulQ*_~PDYm=rE_qlb2UKG3;f#-^(%Tuk_k{J>kPJNt-krE{ElcWog(GQh_ zYcWER@B@$-5ywi1*B1ocl0-6ylK2==dZT}g(nF$VTgiA%o`r{(2EHix;bP^=E`u=o_i3-mw6Aq`%wT}ejk*8k{I(SIm) zTgvQCleGY-scCP|i(Oc3@rjTM7Z&4H?fV?jiC4@(_n><85>7}DrE~m;4#NN zK{8Pj>lZ6JRtBt4JK`NrDZ-aut2ng4;`(u|AGawbq2@}JJ;iTNe2jhaNmBJRCr-&0 zNT@{A@M#XASw5uW|IQkNlhn2lv8b>hJh6 zA7P|oBli2PRf)k^0+VP+RB~4pE5gmZgM+X4_gjiAU+&glIV8$5Kq5E0K1WtAvOZOF z*T*No?RBF_TD&D!fXK$g{vOE>bM#@{yI#b@(>3x~B$DIUnjJ*2-tNxz6*kL!-Tsxx zj`Pg;Zg=R%km$w}IRb_!0xPwTqt}ARhpoM5KNK041%4XuPv{Q0d?eOZMJlv&`ex8z zJeCgU1H|KGB4S(#NgS~MbHrl?#4xEQupd~7cn>xhL);ifOQx;^;d82&WHw%x?F9Ba zO~>wgg0s^-80;y|kTeaA1w*e)iT?3@kt%he|FZ*y-rWMJ_JYR-ePB3Hf+pmjP;}vN z#nu$c<12BCmmYmWo2+#wi$oZ~T%(z0VJevfi97##CY2qN(ZoiQhT9%aqd=Nq7$OUI zG9?u8$`t448)_!DHZ?VoS#c*yLg6`0a`tPAl~BqjN6#RNeF-J>1q@-9oDyk;WKk-g zLZz&^!f>)j8`PzrDiXp^oWs`mS$vjtFVdK5aYYg~pYMR6{W%m6-fy24lav4o8F575 zTMYA}+1lDh<$fhovTHphi{iV?E-hYSC~XovEswOCVj-_lV{^g=NEAtZY>eY~X#(l* zlp{q?vg*?P05j)O%YtdWxC=n6NpX5+N!u1E1_ypVu{LX6vlbhL){kUNPNZx>XyI98 z_A*fvM!U0jcu7OZo}>+dx%f@hx49E=$EJuC&$xYH>6?(cpBo+Dr3rPcQ%7-CHVZd0=!6+^MR@RqVB(?}yU z@d3u(qb3+a0<#R?;A$X0(7N~6bPG!R7IS*`cC&kOCfkn)v`6gznhEy2xUbk~UeOXNH} zW#(y|vA0*{&E(DH^jS*|Y4ZCrr~2xDaojxK?Cl*G$OfpBZ2S3Ld@z@MiaX|og@b59 zGA$4(yfT$@BK>7G`!)c2}#V@@* zIVFJLX_@qiY&eOIYWf>$S@Tttdl~4U6AY-go=47J2-L;Rk9?6{SJJS8I-Q0-dfwY5 zxRN-ow8}Jo0n!T5(Udb?_<-hPpeZ(pH1e~`L;EE2s>(xU7?#-pd>V27 zThB6D($HjTPnI5*%7~fqUrG6 zo`s%QDT`6%`@lmy*~g<=ER#ez?sK0J)<7=8<~wx}&}oyT+AnHN*jI%#&XXIQwRbr# zLMlybp9x9K0Zw+>f0RZz-=@wjJ;xNj{>rY5+4vx z%4M=upjg#N!U8^JT5^G)M$_L;ZBwt&EYsGLjlz2uk%>m$!^W?RlvA=VyvLqMWVO&H zwjgd_z^+3AkxPgpUA6DtD~tgcv#GtvvUo^#Nh*HzaKIn7sj1C_fmxv+j#Pi}NmnGE zW{m!AuXiO{9fDXf0H$Av)tA=oF=GO_UA>*DUG+p3sAZP!3RxN(=1P+Im+Ex89xta+ zCK62pHp^igtd0W&>$i6ZLSXy+XV;|;;r}h+`hr$pA(^tsw zv-jzC){Jo!o6WeRMG<6RxM|V(L2dcW3KceYXDXLY=&S6l`a8=s5xz$Rl9F ze>T#8iNcOaaYp@o+|UplX5LBE!%OAyTE0-z_l@$}UbOcCX@^dcW*{S;CM~JDCkC)z zI)~H{u;y&!j zV*eF>Vm)-nyROW$iBMOpwGSN3a(?-s1&Y1F(A{vmuC(pcBF-B(7Ek6?CLezpbL3+> zO$p>1%+O$6A-xp+v6A4OYtOxK9kYR>6P1_EMuX%(mB>C$2iA0<*~__Wmf;R{ggyk2 zh9M#A4QC5D^zTIm5sV=Qa7MrYb>`JM&xxAV6_aV%q{Occ-mLOE)x>v(B*CA#Tr#RHv zj|)#7?%muNOB8~Q=9(tI2b7$-b--U?x%ARNf&FJn3MwNau?Kv6saTB34#v!1BgEMk z9VzqrrD6Gwq!4J~wpR0ZBi_ML^$?|fG+zQ9Q;`iP3dlyN>I>XtQpQ3D)&Rg`m4?C? zf?xur=VNw;&1g^VJBLT0@oiv0djjVj_J;ab7ny9DgRDz1f}2dRMy7wdQgMK=8jz$Q z$@=+=8j3E5Y>il6=#B67K`_MAV7rpjYO(u9bAKH+|3FA(ku(w~v)7AH=cYVN%}3HK z!~%I`(%L*@zCBb~g@aIE{ymgOcx;nYALL#f?*xtk(t~RD+Ksy&&`d6^l%0uInM8;b z$3&544SfX*+&pB+h6F?$&W@e1(rq5=|3H{PL?+dvvzyg^ffUvx`s9nj16z~I>$A+K zDLVc+9fJf_{Kk1B%b{Xkh=fq`b(P}lfeX%f#YmkvaMUY>@$Srw+Rl$Is4T(-52AGN z`;&h2x4ajBL~GHh=R8VDQv#Q}(}5;_&MTtodeSCkY_sAPH$41Mxt&a62qJs_(kGd! zg^&fPr9i%?wgQ!M5^5ZhFr3koK1nj{y~Ctg4-n?bDNlGjhdSul}NK z{ttlOy8NP|9Aa%iq7oVVOr9~_(Gz_k)5>(M>3dbMl@LmW=tI7-{Tr1CVP+^61a5k_ z0@-$ME~H9_kwQ+1;W$z~y^a%x3eioQxalk$h!mDxXVIvdhC>Ik2b$vU$^32pYPW#= z8!skLJe1Mjc|`>IH0`LUt_N;2FkWV9a)h%1*UA7b0esj<^$Y{zox*CSPSDv0kd#l7 zZ^+z8?xU9TCMEdMp@O;X1N9MJ3K#~^@;gofrTPDWG zo?x=%>zth7v2nO3Yst_T1f{)mP{0iU|4fq8OAy-UxmbFA>7JksOB^QKm?=evR5Wn# z?Tm2Lhodr#0^9rHgOtE)AHkm->0>Jm5hZ1tKFT(Cu~I%}p%*lrxyzieHS8jHdz=JL z->NFZ%$aFqhz4%AR%-(^ofK!(siWV`oRly!tVnUfo)0$oxIfMwHt3iPpaO$k z_{9w;t;m@;tF{2_jrk|8sV$!7&`rq4KTt&Dnr$fMO9{|>-24}Yru>#0zynd-L?>br z=F^EWxMuV)f>}Y);v7;X32Fv651B1F3MCqukc;$hIt)Avd=JtgGU4g|xRvS>ynp|> zGM|E-|7OiS9DofxO~))j5YwzE&e*E!5Fz^-1A~41{oN49CX7@(%p%DKl<2{=CNO=j z$|>)slsOBzGaENdq5Ew843yb|kz$GSyA9DU7Hd!rSQXUz`fG;i6O_LcC_gzmjg9BF zmN39HVQD~d*#wMtW%nm$@Kx;85zyZRPS*#Mrwa$=FWfuT2-L2qI`LJio&@)>L{h3{ zl+9w3)JkXoAM6ChbDb2|Z+8u1$BBJKeR~F*c+({K{eMX|ulqRJIPBKoV>1R(JfB0v z+vGt;V8S_cAR^PI+~SimL`{5}w^0oipSXiaF_3!KkD5K1WAk6W`M-SgfBEMB^3DI{ zoBzu<|Cew6|A24qc%frMnMR?gDbs*zmi_;Xdq%|-K37$T!WSuaN$n2`t;qyOVctkF zV~4_Z0Go#cXzLl>2&5J~OTncV5>j0wTGIr5N$4|zmZ+XNMByW9C`M%Clo~3tqkn0c zBM0ei4}e>Q&S(pc(Ct|l_HHj@K*GuNd;t33{{7)%SDr>{SQgZ<;FlSZk!+2V zlqix1L5Tu$&ztrC1_Y#RwGSw9E*nr{(J7R8M4!cO1;+7` zbi%D1UjbRDg@=t3AY(OsdL2ZI%Ewob>belsb)+7>s7UCS1jE8O`Z}<5?*t-J^(9 z#A;j7Q48dqh#Rjibzn%N zNU~HI0d(PL;VGKJodFo1B9gYM7W9rN&{M!776=XE(=b9reh*U|Y>K_$=E6{wEPa5U zm_WRPKIOWRP;XupslumIR(66AhM_WqhGde8oaC`ifx!a8w%+Gy;6}oh0J0WfKnRSP zc@IEr5{A=q1%e%~fx&M>Pr-n_1xRd9BYsx#{wWv{mno(+ib2BWzXx3g+bjY%gJAj{ zy(W0Au*S{jlX8&X6&4b`-u)!SnMo>PO!6c39`LW`JMm=Mcn#S0S|aitpNA(QjAmeXyFixHBVXPpzidPDbmc;Yw6_PatmXr?TW2LH{UiXpL zRQ>2L%@<=;iI|oo^D0yLG!3cA+q>JAll5j z7P)6CP?eZ>*V58@^H-BhXTwZ5G)RtH8Yd z!?Jatm;yIk@ME$kx#wMUK|sU;Ztb(vtRSd)IX(~v*l{hfHq~JsMQ&yCIf>I18|21^ z8pzDcb=IBQvBzEt5N!)lK#}RGievrr38aM4t{XV_c;&74{VVdT*UOHn)CUCr1&9}k z7*^;BUI_;LDGXX&59lXDW4=n5g_AgSn}hAX&HOY0`Z%Ro-g9F)KQ)z z0v9<8s0w9mZEaOW5{ij{7ro+Oc`ijsVUxcsJ0_Ag2?hOKhD@b#%hqUCS5^5rJ~#gp zQMQr~50e$@b>ws~IQ#XRtH|y|bYi_Eum5 zb=IHtsSuO;UbJJfxb*jx<63h>$g}s7|G@*QY;R|uphN#WxI5i2w>2CI5x1pI2k`H? zyix>^T^Lckz4{}0Ur6}uD*Q?fpFg07|50EbyoQC7xJ8{XZw(<2C%-iR=&bTUQrMz9 zckZ0{6w3=Y;aP}u44U$9TFEU74}})@GQO(?=akBgI$1_YGwchQy7XRivn`^at@&Ds zKoX!nJkb`{cMwQbgHb!VvghBS*&t9r zpkz)3*;6Ra!SZ(DP7~wU{LYp41v`yj00Pa4nWNh=sF6e$VA{r_O_gVvUYx0F8*ENG z6nd2$04d}EU`yG=GmF?yqB7Ob5P__SteWY%N^Jk)vV&9r?c(AXGns~4Llk9X)cZ>( zr;e&r?unsX#j~Fd8!wTRd{$cgTj1t-4LR4c0q(9lQ6kijXiM@DscbyXxF40jmLOuww(U|K$x>}YOmyqA)aHj`PfV2)qs3OZ@G8Z*9{R?%>HAK~$ROQ5vDPA#N% zYTd_4=K><5pX*ck{N5I&(=dJYOmY(!%&Sbu6?;X-7-EKo0b=1=s?%i&&9p0U^OMSA zdu&K}{TE?`noA)DT!B|7qY+_oCd@WrZE~;kv$kLGu_a`uG$A`L{W`{e_f=zkJwH`8 zTXD2o>B}*v-nT1Oii&1I)spW}WA<{)jm(pmH|wc?llS&*CTN6s5DjA5NpmqC`1QNB zFFmuewpLnx_^pYHUt3IFFvhFj#oUNn#~gVn+5S~_ zOin3Q-)B=0WU~Fh0WL}{20-RsUAUS;A|0Lc>y26w0@SCCk4|0z~sY}cP-6&k-F=858r z?cwq$&+$jdFS<2ivI_MCVa%tkOdneRFVi8{1hs$kT%F1mGtQ!Ra+?Pc9X*dL(}vj7 z^J32`{~}@I4ZF{XBgEPhoIT8jh<>hf?`pnkS`rHnt$*QuQu*6zwid^TcCeqx_-T~( z^V?k>8OxRaVuz%M3R*E+ibhQh;4Y z%e-Aq_UoUn=Lkc>3W;5r8>0KYYt@0mmWo*asA7-6c{{vhHk{zvWGvo$TMFYvZRQ=##@ z4F7W?d_)ENbPhi)!swcLuHuRaT7#Q^k+pu#=mo(;!$ZekzS;hAK0+)a&TAVy0fnAY zcE0%J?zcs-~^=xc5~o zEjMILjE_7`H2aerHl+2&ALXzIWIa-uPtTH&cFD~wqyzfV~a zW6WBC;j=bwI|AS}%fdSWRGdWoM+`X^>_ONYI$=vNNkgh*)0wUl1Ns+t&T!z_sk87F zq^T{wA2TWkqmv@pt#hzZooqF@F*e@!Vd6E~n|0zzs-`8k&2{bulQ<90q+ttu&BFA) z!}ybN%h_puF~&_7nV|uZZL;x$+JligvIC-kJy7p)SXfweqxjWF*nG_&EJb}L%+Vi;q{2Yi?irkxzOy?zbf=-%%km%qAN2+h9pKJPA+atDiGxwtk7!#G=;9V* z`!SjJj4|K^(+4JX=mm%3ZM70=9-7dpM~{50-;#w8H*?f74TvwBLDAe;icu^$qlJsf zP?>bQK{7Kp{!Eoj;Q3F7U##Iu(o`>=UsJr_iijqhy52o3^_8mRB{uw41K%+@0WU;j zE!2Y0Ttcav{g(Ed7}iiKUS~4zfQWf4lPEjNo`r7*8J(K`Mi*~EYe|Ht(}DA+%)Enw zu8oh4;O6r4Y&x^ps3>q2(qlXAq)u13V4I0BZ^+3jD2VOn!HQ*M9B(+`+|khyAM7PM zAs4L~kGVG56fJY1Y3g>924^l+FBz#5T)VyCkA$A6dr}jxIS7W=@+5f^bUXJ#?%^5t z;~TYK68~vQ)SI&tu}8A=S2Prpq*W}=Q2f^0o6Utu)qD7v(a%vEbf&^TRZP`$*h+5^pb!eeBk$bP$*^21hXDFIE8WICAJTe>@^7O!|9Lq2 zYKJNt^K4}D>%Hw0mPdziIR>Fze-5nSxx<@beOk0w2$8LfNjShj5B<@2W-9I74MWRP zXcg{Coz6n|)MbobU^2+@>=~J-#+dsF1qG#6KeOZEElhfNrScn6y>|01G+ z7#}=yqNAFRjH^IZF$7`Z+4m~kdevzcP?7fjOiwsfmW7qWK^f}K=2-3E@GJ1fZ7fZ7 z54x2)NPh#fs5_s~EUxp4BImv3g#nhllA@mW+Jcsho6iLQ+ zA(?-1pS8Yk$Crv?jA{QD75bh-#h3dd`iO_GilNH5K`?$I4NieC@=-8}sY;oQP3I!B zY$_yLkU9Sf@f}Z4+sA}>=hDN&g=K?%AN{lF(qHa+K5Omn1g!%j@(T^`@xP7?Yw-Tv z_2S>ey(u=35pL4F?FVb33eZjF58-Fn7nO&zh4n`((T&OmqD>e_#pL|2$&8Gp4qelB z3U1=m9DWw_b&%?OWI}GHjf2C1sA4se9gsGhY4K+p5^sPALPgY(SORN}w{~vn;YPN} zFtJ@P4P3DAGjxF!7;GjNVX!sO@EHWrBw(U3S4&yyM0~gkAz2uTyo9DX46h2=@}TRV z0bSBoF9G>=y00i9f)FDoHmsU=cq>8@6g6~$w*4cMU^1T|gm$r^+SxL+lvt)Tbfj|* z-cSB3%O%$5XoKK{Gv++t(Vs;`k3DkEhI-$v9dn%9x}|>7Xz}z33AX+v`G(1P#&rND zL4Hl9Z+M*p31;>b1{k^DO)-B;D>lZ92;CMw%2^mIsI@8D5eu-A7g6M>On@P`R! zE6-N)dFTIKq71t=nNsTF!YsROo=V_OrxK_?10aju$LPJJ_Ca>*$=uE1R|vdL1-#D5fY*P8Ll(?r=CkAgkf%bJs*Pae zKFQIKtA`O#9kWCYqD3(lhe(uFFKS?r3;@o|e0=P6=i4-a^%?*(C{MRbSWS$=rfrg1UDwh~GGiPQyF zK0ir%f28dsPw4a%eKj;|WaP^$jKfSy;z81D4hE#hTQuzm5mXj^|2J%qhEu=v({;j$ zX-=W{ew*q2;XCORnAF_?y8v?I>&^vyM83zg_i%5w6X0ZhOEn>m5QaxpN}iltCh{cn zEWF#vQ+JTTFNm;{IVyiltpi3y8h&+r#=pbbsV|>;;FCvH<}V9(z|aS>Hv&_>;4Y{aiHI^oi5e~DCF0dR`bO>|(ha%PDGOkhj&2oX3!Mtg^0 z;7^GI#=JcMmM^RZOPbLB%K%U3S52-h3qh&qPi*Apq(t@yTOzp}O3B)Z@ve!jI&2v7 zQjcP1`&}uyD3u?+e+MmS^T2OlT{TIACA08BYe#-sQE^+0^{w-Miu^5$n{9oybLEzb zDFfIqe;Y%X`_aHni+E%=adM=cZ$ChwA(G^*>xtZ9f@zk(0gI-bBiBoCyOos`Ar|N? zJ8ww?DkXlm9t^cuiqeF0PvN|rns?|o_#KxYhyMP;mkIx;fsZyfWd9a&%VEvGPu1!R zy6uxO=p5y>=Z76SpK?5Hm?amW*EP^h3#@roRlpc&+3|ymj;t*d;_D_7V-{+joWvQ5 z4n`52!4fL*N3 z;Bej6IA<1x5zr6djtp;E>LqtcK;E_%_kk3HTw;fu$q1*+OEbt>;B)7WJ4x;($Z5QF zmk#hf^-7J4AEWBY5o3fi=D;7A)78p`|B`TV1aFP+j{NCKGUUYi7vo;k!Jp@)qHkTP zT9SDSl3)UC0!tqn_t1~GVvEQ zda?>KVa_ajDuYKbn7AS;^nfZ4+zcIS~|^LxX5CMW8W zZSauem5)6BRY!AVVSZ z*C!#f6Y)6cf(=x>i1{N?eJ|5QYz)_UpE|4AJi*O3r`lVjmX=|h6-o!8F|M93mrLK@ z-}yDvaoeqb1kDxvTr%w3 zEB{Bea!K34yS~-GynF?`%8}yOCS}A)h>j!xpW^uGu9!r+$bIIJAYX@5l zW6LRw6E1t0(pS=q;L>XtOAQ;&rven0syFGVqA-9bYls(D(Riy(PK~o#eE!{wYbVt- z@>ZWpG5yIcr*ZAI$c-;bl;ea^NXRpynPafWs5qIJ|y#<1oG{c&pd(W$0b2!|P&@959TC zyUzn&VfRs&AeM9ua;}|8NYp}!Rntn;6vV3*VEzMe^}e8C2fyW_bI}ky4dFxmh&Rz6 zbH7;HeD`nO8P5RSz(%$o2>LRY;p0X|M(^>9C(tb3WK1Pw9}rf8_7XxA{a)m2m$dvP zGc+{;>~|SG4|YPG82R_pQ))*+2AOSx8Az>1>-u{ zG5zQP#y$~J6J5=(dThOh%l~^u?{Si3PJeEmX0gW4fG~9l8FiD%)Qih&?Uxhx=)%H{ zcSGpsw<9(^BsC-SHXCs2{t%b^ZtU?D6i3Q`PVDU)!jNcn$mIS)PuY>a37Yns1 zcmtDfvEnhz)+nP1jB(mh%q=bW4iDSP_?SwdXkt_9rsO?lJ4J`R4DNk=a7Yp<2ai)Ur{ zYDAswJ{n0zqbizM5Yola;L zmXn89(mrhXF`*rl3^?27*;<&wQ2@hB$`r1f&W7YGv3%iZOP_%ej)a{gj&FGq`$B2T z?;`B8=qdaar*YVU)&3PnYMkC(?ql!l7ZKe9;PmRx6VA{1DfW)q&mq_Q&F$*9PCZju zyzep4AsUd*KdMW8K~(;y4J``J^8CW;qz;}saYgh5oH{djcp{)+11*v;r`~G}Q6f{g zMSSiz@jrK?E7eB8?7-sx&xUk31WM5th&uMsxg8tXe&~n)O!K&esLHOu(J})X?wd*Z zJ3%tci^?}(2`C96I2D!^D{$YWQq{bBRP2(3z9n!bHi%fTpX2QWMU4E5N=X|b zHX3>;r{bK1i+-IazvVqLSp?rz(nQ|3K1*Nn?=r~($Rs!ZC7GlEE3;Vn?)ZGdM?#0R z4{Q+mVZ%Tx75~&Vb_|hXZfqiOWfpPGJi3TDG5ruM%fdc$jx8|;v``ZhNUUx<+wF&Z zWxllZOfpZ1K4tc-7V9meR^I;_I^Z=Gz;ll{BROdNZbbThSO93bSQk9u9b3Ok)Y&p- zpLgZ1Ooa_g#agRE6&$>2j@_FvVuCqu3D_``wq(v*Qp9-mOLCUN(Oo9$9gHovx=P%) z`^{iArZ3Fm9qOBxtj^Eu1&?=Tj|NV#*@QzW!LUWt#J#c%@ku9z)n}f(_KzoIX`iu{+4IBQs&| z62Gm;Ili6mYLy};{J(b%nLHZ{acltP%+y7C_n*%ZCgmZ4rI^7Qs1&6)1t1v)H5IAg z=R|l}%(rH0%wNIik+Sz2;ea=8>oRxpqHd}FHJ?9{T#&J^@T8db>eZ{GKJ$NF^_X=m z{Hv|Vj8zCC2XGYT8qWgw>-=1Z7pKbg-#R7;GWW~w7-=ZVO0|$xQfgO-J3nBz6adlu z(BsfPOJ&B?|J0LV%Y!c=wZLC9?=uE24v@(ZQX}FOD4y|tOht@3JA@*udMb8_I3q=` zI)~b08i{?xq2ph=p5d_D_nrmM{uHgl`F z|38>AtL@x5tzrV4q0n6nqrcT>My)=XxNMLxaiKvO1CmSekY(qVwpSR$vuRmb(Ady zGUs1LaS0}=Hq5((2xei=MI_adhcPd9t9Fw}R~c3J6Zs00@OH6dMvU?06RSk+=t3=QMAISHS$vi(X^?g0wB>+zUr{ zh@9EB%MA#Vav6s`MiAgXdA0Hfo?J}%+}YE4Z!G7jZ$U}~Y@rMr{vDGh!xH}0Ol6#L z>7`#(2S?UR>C2O?Jqm%;lk9F;uxU`lxMdf=7wo|CFM6d#V{C|Oj} zG5Y8Bu*N41CJ)%8Sj-$&^dm_d_aZ_S_IN~}MaCFVM*t&N`(ioN2+p+vpdEMK|90Da zV~3v=st3{oaQ?VyxYrIdqBmJrEK=0pDm8#u`^>Kt&&cM-(Us=y45edu3v;1wBNqRbm4qqpJfad3GKbnUgU4g6RlH$c(r>c1B7q!CE=2b=RN@8(S|NYh7&r5GTx$Rq@ z7u8}=gD8p5b{)49d8BMYXi*$O3fMOWj~9E=jjFyZXX#-CFb3vTAxn>+6VE zhGJO+qs1Y$)WG&(qie8QTS-q?{$hvJxu43w`}x$QHtV?0)z>C;7{=3UHoaANr>X=+ zbxcpwc!f|191guoSx|j?#u`;6tCFu{#g83beJz%^GT2WjGdVI|Lhw>{&-qi_oJ&3% z-pNr^Z$D`TW{zdMm$~LzOEfOiniIjlu4r1829cm=*k?AWyx1 z?cDsR9EEL*gpInu&zhn`p}IUur3d{yW_jOI4hBx2Htg3vX_B0T+Y$TqCmTM_dl|Su zB4&@CuiVO60p0?77E6B>I28RRB7hztX&E$Z6f98CLyu@K^DL6nGE2#~QvCi-?_kd? zIq6wlQJYM*E4>7sByIG_PsVwq@Jz3GLGh$X9m+&w`w?a(cD(j=nm;C-35vbFLu$?FqZ|oSf8$L;WshZ;tW@M(?V&Tx}m$T9?Sv98^E$ zMfKOb-PHm)b}t%kB^S@CinUiYnJ;j~p*ttP*(d*Q&*R62nNTG@k%srv>Uh$RxfbW#?IAZEA-+gh$wdmU=E0!co$hG86BAFV-&7%%vk7(X=yyKk^rCW zG5RO=lk0EvBoKN9v{ ztvq@C9%u%Lap#oNTohz=a?0ukuAbIP6--Nw#TgMXLEt!<&gcDAWj&jwR$ zP=)c4kMYn~M3`o0V9z!?C1s-LHmT+kj+YzVN1nM+A2SZZ_I2{f&>DT{DbEY1S-jeb zZ%YG3aAQ0>w<$FA+INq}6TN^Qynt(5hK40dxg20#rPf_tJ*~Kke}eg zmxlOeybXnlfP;*WV;K*08yE{F$?$kIT z6%FotcKNug(4X!~(Zt9bS9*Xe$&t70DQVy?;yh!&XByp}eWp>Jx%;%kVJD5OGV}dP zu+}#P`O%E>!Jx}&j+?J4rR44v7(lE*h4Z9VAxBaVa(FS9<+5HYF)jYx;1Viu<%ZBFU%l(9QdEX}y>ZG;h}2 zTK;Df_%_bfGD{Liz&l0Xp~TG}_nCuloSKrjgbBN}`vS4Ju>9jDG^yrd4&mvHn&1yI zs!FuwUk#OXk(?>lCBC?}1$B~P3Nubt(vK{|Q=jqc)2FZ27aJ(E ziPv*^fyusb_`MR#s&}))@v!$AQeInoEb>Dwt zvm$68W|_=ewK@)M`d{$4w@x7oxBn$}@CE#wvvjtMV)R6l;MWkUu+l_mHMs*N#1dD< zomT6|e38dmE#vhMM#M815?wK?IH?qW*1ZzJovf=1Mdt1U>)dI&2M#f}nlrwh| z%Mv$yEb@t5YPbGA`mwr^(Y9UOrMd&`oBGb3;h6d&d}c`_v>Xxqy9 zPvCpe#nE44gD|AQ-*9@_9Zk!n4UU3RURhSRm;s6QDk+yXMN4a3fdF6kLHegKg*@YX z0*D4Wn%eE{Qq4TO774?qU-M>&OBRv7I%Dc+7TN~8(S6-=VY^%z7llN~D)#vOgfdjy3bZ|}R-r>c| z&EH_Y;Ev{DHUC%iffI|DTgNkF7p*cmg;vb%ZIIzfH{E+oRuls#W~ObNlzVr@m0VMKO&&lJd${IWpeV3^E($X-s({D zL{AY`@T}^N`Siq;D8=|;?n-IsS>7*!PZ1|S zV{>oR(xO$*^fo9wOCALF?;SMO`_QMZJP0**$|m;?m1mPC={t3LL>@ZaR+^xyaWYgHgAFI=2{syq55!rh9m zN&eEY;nCgId9GWFeB?KXtx0{{QlGFlWj&`)p1KH+*siwX_Wi+Sw1Kov#)mf_y3ui>&f(R3TMR+~*^e z8+GRrtHX*p$KHftK1mxC=UbhfkOKKfB0L=!e)I8py7TG%To(0J#xo)~GD z5O?M&L-9Z>US))&OZIE8Ah{df&B;sjN6R~(a*4brE%iq`p6D)r{(7d2YQ zn0$JX@4e?-rHQ9RqRfV#C4Ht*sA>XpbuYXqIvPc)&<}Ug1Fu>n<>Hq=dqCB0s5mb$ zJX>Fcw_zWeofi?_9g&*48{s9IJOlLosITHx@YhZ?ovW;kG2jo2KvcyGALu#CP!bp9 zxfjqWaQ6!@BTI7Z5ki zF{L!rQPNKoJ|!H&?2IhDW?61>U~*qd6+L<3fi_7`NrsFhQMbg;lxv{rprF*{wF|c^ zp$X#blr^80l6WuE8XaLf)Sez29UcAT8+v_Xp@Rqr2)LD+n);+&(xmvJa67OpdwJvj z$K|Y2B~;;S_V$^I zXZfO5);jH~{XWtsZQo@iZSitxyEEF1sB*yH7Sj7Yncq%K5bzT|3ny0B2^@#D{v@M! zn?s$=N=M1>J#@PU6%kSs0~^glOP`SJsi$$Ld#0j$rzElfjA*unxT6Jrx}|IrW`~l) z1LKiy^&Y{7TcnK6o7nDGCLvLBBY5~NRXT^&aZ9#Oe!1Dulkq|DDf@fBfK5v+!WCSN z?=9r(Hlb4=X^^qota$6=hg>s@Hz}VK1k_2~OKt(F7nfb5Hc3%Bm*W-yIK%;qQSz6u zZ4M1VM?9aT@zwx+l}n<^&c$9+=i3&qZZbs=Sq)MEX$&`+0}TxD5e#>E7l(mLF%T0i zbKYdNV;c9px!n0yQyJw#b(=_9u4QJWWu|quFHC_Zo|F>w+?eFPjl38{O@h9;`|#3- zsNEC?P&$R*(Y09GV2EXK{nqqpSl-tMzu(+?C49~2Y{~XS>4R0f`?JZWhr`D1JeP0X zpKh15qA+@a+v^6bbPxiS@gqy#FBf5dUt=(pBe0ZI1gu27fEV+1Xx29NtFI;UqEact z6Y(H-upf6ouf%chM>5BhMsvhHtcVySMoXbg8LyyLp9oI{^?^5l5SrW5YpalhrC~7n zwqO}`SQ8`XVKX5|<9OrgUQ)`NAdYno}4xSJ>n;-x)mWlQaFIgMy^u9ynKDlD0=bYa&_g)CnStD%M2};>ktB z6=)tG0W@$)YL?$$!sm0CE>sr!fcJ~H1Y?VCeBm@?|1lqi#=L~rmbbwvr^8DHQO_Gi zkUi0J;A1+CUOOo^HWa z$ct)7sWNaDpBgfWF|Rgm#pPIDl#wfQ#oacDntfQqXKP6J_htbM$4rFBqY4S=&;5ZP zti6PKk*eB#AJD%_yQ6F;#UbsBSS`=Jxgy>M!kmmO>9M4WC|+k%8v0t&|0L?Ei70rj zu#SOsG$dghV*wub($#SU$0MtED}IhN-?;ZOc$s8p`lIV*cI5< zLHjZlTxJ;pgNXPk$kB8S?U+S~9x4xxmz&&Op1I*;2~`dWUk_-s$l~3BH5GvT^Ac_# zsYXcNC;5Sm``lVmb$j4|7(it-=8t;$r^(75B8H3Vedo|6hUfnM@KYUXm4|UPB z=l@7>F@V)V*PvybpV_j!Ix?U6^OMlI}uRCgF z|BX)|2d@Un{=LFkTi_!Ht)t7C1p|a*1IoWm7kTp$ z5pHSUB*w#)zfZHAw2O`F(1o*UsW3lI{D#Sla5Yrl)sUnpx-S-vWmK79LiZW+3zyyH2{rH|k-6vdJn1C_*l^gU=u4=6}}6fm`W+i3f_KpX_UPEr;4$w}SAF-ejp}=d<<`z(!{C?~8W7H;GVm^+a;5Oho}kx10Q@1g014LqsT@4ix@4VhfdQ8)j|(E? zjKS?#5ZFuOIvamoM+DbTS1Q4k%%48Maf7_8h1?N4*)s(83i}E6LNSL3Sl589qEEI; z1a66)U{nw21N%5)>b03c!RTovkThIL9ar*OKz0kxT%MoL+z3(@(&~U>uLvPmD!`c= zK+arkcbt67t4pD99ox+#$pPD%PSXa>$@j`zt(&N2ay(Hq{mFDS>HLZO_`@puStdvevFip zUa`hUj0@dlWus+=tB{mgw~8;Wgu>w(t^v5hA;#AKuzI|e*8(vopiR%#*_5#|@eV57 zhfb>v{E>$jAv$t|kYhCYr9rN7sZiHroMl|J+{Pgm_61Ha})u6nV?@1D6=9?PA2}=!67^r$oKqT5FYvk zu~wTo9%EeSrX*)>f?YM^&r*5nXl3Apj3*)G4ihDk!`IV4MG&Guj<4I0x)Y%_Wc&RF zJ$p)uuEfr>LqP{7Y+rc>22KFYq_O5BEFK-e)UO7L5#LV0O7-XfYv@Sd| zE9iMO2R3sv@Gok;?+3I|$lb=+vi!A>JlCd$3DUC~@#;(qkl98+Z>tSz<07EI8z}Z> z&YW4z=SkSSXp` z%ST9#y=v^pkz2m&pCwfZ0R~<$nr^R~(US3!dN`rq6SBLu=-HNS!fb_;;AypKEt)wg#@nktY$ zU@fGn<_jL3q91zZmIu^<;DOEQnBu4(sB3eEAU4hreJ>FgPv{-_t8m`FDFUh~DYOI; z?tCwP@<a2gO*wDR9Z={keLoXeM zL6Nu6!75$M?piByP158XMs`HO&!W16ecj%*y^o*xq{r{Qv$0RlHCaPkE4pePaY-h~A4#n_)*7zA*XC@E?W16%=FHm z*mhOH*8xAV?ae*{@z*Mbl9{ahs^(Dltt)25Z1*PYPSgYR*Tw)ew|h6o%43|+1SehE zo`)hu?#@nkW8p2R6-19`$tK<*=kV~C%Fo+8jj^CS;B~*0kcxF=2^GcuI;4IUY zI*-A&03P!PXaJIPxi)1^&r_9zP8a$cL!vZ5p_OlxLQ@%@+iyMTV}FO2BJ?Jts9yINB=L-n`A_o3!~L z-uQ6Or?Q3o1g(@g+IA=fD^pMAVuQsctOiIp_Q=bHI>J3%Vd6HxEEm$1Lz z8ka4@c)ZTNqj=r&($Kq-ejHNGBOEWu%qit^gxTfqOmKVt7b{;&-_sWtGdUt%@Wk;` zmt5{+gJe81avZXVldi{rYL;HXX%#&mYny^uA_#*FX3Lds5uqS&e9vU?+$dRl zuqw1!e{$bn1AqvVIYm^0nO~m4nZ$O@v^0b_iTcR@r;wM2oYV}je<*J}C35voAg+|q zB9Hrj8MYBXqLW54IDXNSAEuEE&YQ&*5N2ZH9vB*``rE>gs0E~vit{i5Z{pM6#h~H6 z84TE&wl98*NP36&;lDy=?23$1Qk_|V(F&EpnMqjDjdyepEp5QzsW}s0rbQw@I}Ss( zpASQs`8rZwPRHxcEK03)8H#c_^d@76{}QK9XBJ&odh3c)O$M-WD!MD$Gn=mS$vUJ? zksr2#smfNE7*-XYnO+IH`*JQ{gZIIhS5oYd zQ_(qTqGAw>mN7nZrhls`mo?Espw1JriI0ohdN{jNv2_pEIRD)r3=bTb`TednjFQYD zZ)>ei47u*ytCfQp*?i0PpW5p(aNw+ftc+*km;rz@qeW}`r58mToOD}}*NaL&(uo$D z!>*R)OUkD>r(EuMJN z-1RK4_{hCBVIQ5L;ef_0LKCX0sRAE$r9Fo+1i0qs% z7yj#CVWB*4RVLx)C_#w$K}J;*m0fp13z2yh*|k%B8?*=h+_@17E2*QASx+9LGD)>F zpMR?+5jw3rN&eUcrb!>@)&NOWCemtAhf^7CS81q@q~BVp=8qi069CCj8LbyK3WKEw znL=1G!kb+H#`#lu8-axOHbmT@J?F0;&8nH|ln|1P(uRGpsck)tOz?AF%6a1|qTPVW z@hE<)Z{p8uOegQm0Wu@hOntvC1uu0>Nh+wdx5=9yb7t!!3i z4ip7P`!zsJ%d)q94=T3aG0nlrFU=#x*zvH zOxpYWh{IN|&4(Pjqw=F`dYe@!;nWJ*!IuWRnSY<=u=bPqP+)<4ihgXWU|&wYnw{7n zaL+*QwjtdB5n-WDUESi3=86{s_MB6{&h}l)5|uedIjpbXe#+!CckqZK3nyim5I@Rb z>11?JyxidK^4v{Us=1ourD0V50d|$xoM8Gl)^WL7xKZ6CK^(`${I{{iy6`q|Hpb}< zPQXj&`HES;-Z72OkJOFdxJA)-LO|cLS3imSL{i#BCdqzmojeI)%&yY!1#*GL4ZgXu zd(zIybEa1`PM(x2=GJz^NIDfOssEYs%ct27fOq{_+?$P zZF=TQmk1RAb~)MDxToS zWz$n>mYdxgC+FWkx%7$Cp`|govRO4m;pdmInH_0>(O2Z5PNw8HPjpNL+8VJ!3{Qsj zs;84oCTjh(OW7ry8E;-SS|k|nJ_d2*L;pLuPMTd+o2O}Sy^n~LW*U}nMVF_g&7BnJ z)g-ptBDAPA^9?2Kqt{-K)oywbve}Be%`p{H zxrZuit0U8Z9!7#D53rHTz_RBg3zxx5*$;1K`6fT!K)zTq-qilBAuX>s=w5nAeiD zjG~*377KI50jh6EDZ??7fE!Pff@vAQrh&zo?JsVOa zD)uceV?z7QRU8B!Uo+Nin;cB+OTJJM_zFR4e!$>8zy#;^*HDeu9A->Cy{5A z=BfeBm7seNB+Amggr6a9{Qm%9_FpnDA=Q5+aPedES z?j*h}gGkV@IH14LlOQ})I2RV2H+ZN}H3BQzK;ojQ;NDrXE~Si_ zG?==2KE+;8DyVL>IFL&?u|<#y7BKnn1M&HTOn5REaPl7H9&U%7px#3Ab4;rFA%`#z zBWvZ@$LmquC{4y3SOsD3pvd7*l{sNWIRsnCu$Afgnybwyd^Iivu8YQ`f zc9&A*G5M%;CL=Z#g~|tDH3UcB2YTZNQ7uDseoL?5=Nts9W`~zaabie#R*!uQuY0P# z8e(!{-;+~7tNDE*Ec)(Z;$NruK}Ip(U5UPw|MVe5dN}f*e=)YAlL? z=fFD87Q20?*1PD6N0nPFlHY|2-)3};-CJw5{IIwkj}5 z^rRaUOJn6h6px^?Fqo*p!*=ZF|psfl5Z7q~CHKY$i7*A1nv>kYP zcA&?sBM8(VCv~}UprMpb6E?TMG_+7BnwC@meQPfC zt>RJ{48}Zhr2DMvJix>yLQtn)Su^(o%qhz%_vpt*oRir?#5g;4od5OLU(JWJF%H8F znn86;NUeZEjED%XE;CeQJ2%VPnzh|NjM?*pO50dwFf8e3A!T}mPCGUL4J!rY?;vWX z^XyVy>u+Prp%;gj5ndX%LbDoz74ajBgmvv31G1XKy`>I3nGtZwxQ{dQzsMGgKeNqW zK}sFMJd1`gOP#FcV;^sFzhgqP^8IW07=HLZFZgz$;9S-$JgJy zW|^f7i@$2u??d{sKy%SHc$0`0h;))~&Cxi(20JdhZ&t|64H9c0B)S{DJ_PE1R@ixt zv#HB^RD%21I4XZ|FGauSYFR7|BntaFLrP)avFzCT`_Pd@!&-Dbd~kOMitCwYe}}n- zgmM;iW@#@}*F?z?YwosZk|h-|xwpm4n1=S_`v`JEvDIhjZ?Bwy;6I(h{onG^EV}S+ z>^(nI0fHzI{uou--*M_xjb@%p&feo6#w(XEE1JElvG)tCBziR!eA!)+{}zvTePf8< z7s${^XAw+>cc)vb2X1nwGyH|0UzfdGdYZmnQ=6)X&I9aW(hO_H5)7z>X5dX#R1{rr zl=KopoS&G10ZMPOb&~?uIBUU;askZVrIy@hI~Y`*=lVTWFywd8qSs^lBMTomPqbed*OSmQI9#cs(R`va-MlgUfce|Z;B&mL zp4vxEFI;nfTIO?oft^0LgoMrw9hYNgFNurp8WNvl*DENo`}Mi@*5Ts=SCdV=->sHi zSY5i`7)V@ccn&?{eY#0CqyM0GpR5N)zLg~`hH&7HSG+6_|jqXBkaCMj&-o)f5I9G*3bq%1hb=ObenGKGnX+LT2A9zagB234AD zs%hDJe@tV^^6AqWRRAWHAY8AA%l4J9__?v+_Y5x{<4;iA9=eH8k*o%9o^<1g+lXpIRi~IAgH%{Vb8+%c68O zv^APaq7(}MAdx)cJfQc36fWak=S6RS<`$xRm|~my;}J(nQ_XhGar%<)9|;|M-jjCD zwO`zX&A|E)zU9H+6-@c#8-$c$8gM~a&#qE^5bW7bLD%-QYFqS*5uKb&&=2k(H0N4Wd+Byde>SQe4t9c9fxO)ZSqbxKnG5cehbX(kGNA0D4yBW^laoGa)Y3${t}38Hekc)u z7VUQ*04(@uQSA^g#{Ns6S?Awr7P`pn`Do`=-q!)FQw}2105g>AA__OkCgfhcNOoQ1WuoczDT0h!7 zsLl_0DqZ(Gj@Z*;3{5{#ij2ZzB-JM68*1L4;jt=~ZskwVF<=x@dmwVWN<@yqP->Mk zQF|!0_U9-l8}m0z2!B+V2FwXJew&{d-Yet_?H{6NG)sb@Fjxcmmq;} zuQ+r!NKT;cgEqP9ErkS)lUq$oA9D0RWfcZA)C_KIziwJur^rmYm%gTwZm(-;BY)HSu@=i8_oC#A`{j`ssx2W77% zI3Mc1C|sN;gWw~YW<-RB{w*r&y9RUzJxqU>`OVmexduy?EP2vTI$?et;@YuJPRvBI zlj;Qnj2uiAHb6&7HuH`o~pr^6X1!vl$nMzJHk-=9i8LcT_)2K zc_C)F#>FvRy!PyH=U+-rQBZ;G+{;YVwz68xm=c$cVmCcBImW{TYjA{pG1OspTa99GfNeyhW7c zpKM~)U>;hFRZ66o`R9mZS}x$ViU_+4K?3fG15Revur(z5mEdC&bb&w;shWkRy&a#$ z&#Jf47E~U>10H*87l<*D5q51=RHK>T$d?H)cTfu4>u?J#W;Zf8r>_3kYO_L6vc|SK znvw`WDggh1R^mf2Q^kB_C~HOCl&*{2RRXVbYR5fFr*D96i0;sxo*$jn-MoQfL6{BY*5BirQnK>E$Pbi8 zpv^?D2i06Nru*cQ9%Y!wX>Yq$)vhj%M057h>Oe}O@$zWenWt46D@l92dPiP=+jyC# zjDqf-VTsDL&4=Gg*~iG(fnYRT(%@5WuiwyZt#|R1Ky7Zb0Uo`FtTviW>o4a!ACW28 zq}<&3$R*yUR}&P99s1o#o@%I>_;Gb`(>#Z`E3phuad`*9+vMRV=a#60mtYD6-&dOb zJ5xJRaJJ2(xizqLCEZy$oe%k~1*g~)0@^^=t-bGCE4qbM6s2P_u%&ZBpYQ){#iOBD z^Ph|g=LsG+{$CQrf7Q+R8{7;&>M!j4l~KD=1L+J?o1~jp&gitJL{eaB>0IG|O+IL) z2qk6RsCqubaKcJ*fSb|1yk;LqI%6w9$YR@*i>yBb0|Hn|IR|eX;H*m@i zC*46u_lJ5oT~@-M}Aw}Z7#%R2!0REbg56giwmVdEZt#gHm z0YGA9<(WvjS+ou&$*~l`!1pSF_Rp64*1cl zV_QK>T4LTz2Ius1U67^kS7xs*3+0#e(;e*fxN8OPG^6qi^4Rg})Di zdaWl!{YYK=Hf!S#ut0*N`jR}mU-LOSr!*~`igDFD@fjKiMl9AK$XCJC{B7qiS|LcQ616f0d_*uO+s=OQ$8sU%_RL(IFYR2qX|WNPb;BX zgcc*+dLsE={wZQCv*CU}O9tP1KMG+BWdWN}!65Ty98F6Td6Q1lzg=Z>jFZZ+*k_iO z{vuSL;D;n8vNXf*xAO2NnP~t>)=A}5=e(TM;!f@*giE3&NI;dy&am9z?&TdoKghZc zq_!Y2O-m1r>h%fD@HcVK(OaU*v{OtMfaWKJEvSJ~UiP2J`rP#7#7$mw+7qVX6_VOo zR@XkL1{*qB37kkeY+5RdM5w{-?}CX$buFu<$;=c*M}{6T<|4z z8BP|p&D^Z&M-f<}+aXFxrilI-8q)l~l(`YsbL>-QZTSaa7PSHYdygLJLb&7)2o;^* z-akVvPq#cZ&*ntAcLKMwg2?Q_dN4-W{?p5-Za>hVTxpH_-RBCTXzeC=#b#m+7M_US?O9>Ls)UM>Eh$HvtC}t6BYEI;rI9srv91!IUt# zAei^dg*ImOnnyY%7$`J`GKgsZiIe5hYG|>N$_)-NFFK`)TonCzYNiDE|DVR`|1pgd z7M@=)*?zw@%@punV){&=YWa=!pLkDNgSCN2yWXemGM)+H|0?4Fq%xTWze?wjzOYjwgbsd0XAGVgZKh-P&7`*zjHXl=$FHV|0D$w6`mUUm0?aZiDhTt-b zSu(#v#kBY0XvLHx8khM`NW;AiWQL}fATwkF7JpAEW5@K9SPy};GrHEUQBQu5r8OKg zsx_q8u$3mXq^G2bpK4-VbS;41=18d&|0kCDpIGLPHOT)H%luC)L)5|l6U+QhEc3@= znNdC=jPMEUy6vNU0vFm)DZLnUA8*i#`Lc*Kj&6in4PK4Vt3BW8Zf8$fA+3kAh@J_G z8&=Z5l*vzH**J(0RK?VhPV4JW4SHTkSvdSBi=F9I2cbSov<7Ps>L%6@BL^tmX-5Y# zSWT?&jc%74I8$=Bd9{FKo)S2|lQh=BU=eV<-&nYmQal(;DXBi#(UvA!(qs7iH}W!d zZx;YBP_!e}GkP7e)p7bv`$^}EJ%^U48ki*#ceMn)yRDcxs9|Pi z_U1Hwv(}%8M%-jSG5nvSHv6w?(*6)2#?6@0TNSWp?=7@znPMnu>${x03sr-nF;}M* zGjhF#xIWDCW|e^d+CRkIOMh&9fCy}un*S9LyZzQsOR$$6`r^`1;LVa(Rq79kO~~2; zd^f8Xy@<|(EjP3J-Z}PwZEenKOV|a=NL9Ik2H?|ElCznx&u@k>EN_^7hy8RZ^6xj> zxKYCgSk_TWwrG)RK8W>vcwk9dlVKfUnLA*tT#i9O;l$0?624|Z=uYI!=`L4sXbL`7 zC$n*t@FZCFfr<0_$pIxJE;a7qP^pma{dYo{fV^&B0Yiv9+rn!U#qmidUr7q@Xfd<{ z5|4=+Xx8-K>)fLruba8kd9a(fyv4q8dA6)b$zK4rW$Oj`W)s=^pR^bytyEe_%0w0! z$*QpgNL!~G+n%ZSR69q!Mz|2p~TRx5A95dMc|p}L3f zgszZIUY1j2%QX+zKkxcsaYGrwE`u#u=;Y}E53qNSo}GHYYx(cPM~Lqn+E;_STW*GE z3~6FK@%EukU-=PD*rVGxQaJZr^-b0RZw+z1xOklvu?hu>$;GB`lA>#hUZvQ3 zUlP-b6uY)Ec3*^|d3D$kmyvhR>wWwcd-Z(-P6=F-P5?KwLKj^MdcNGK{+q9VD3nua zPBzABnAwZojb@*Z3(gXHqxfP1wufT9QKEXD02t9gx zBH!mrrG%3!tutcty*b?zGfr@yxY}rQSg4G@y#BKx>wjW9Ryx16z_)E(gBHeJ3oEm8 zN4nOY`%*9KIOyCvnMVGKmhGieA1WA#9(+p;AguwHW_FlUK9{I@K3%5%tsDPKe(HnT z${;@>vqye&%`x|mkZxMHjny-McxH?C^QB zau#~Er5$@M<{W>20xnrO2ujrdpKJQ){cMZK;Ji1JbaR6oSp6J?$eQ-l3sY9Tr=hcU zJ6iKRNq;n)dJCPss6Q&S@Dqdj5S_#H8N5FeE|1kABi)PMLemK@Bau?EIy8r6bv+TO z#60#4PEP7i6nf>bQ6JJkEK?tte-morD|G(ppK135z^(lB*J^~npZ-y(3FrDx|J*Pe z$Kr2)xdlN<@Y_Gq{Eh&8o$(05pGpiagq469#GO7vR& z1y8kyC9hM>E?A;1k#ZQVj0u2~To||wEqFJkz(J4Qs=-a~z<~oY--o|erXSXqK2)cJ>{i}6`oBikPujg{%5_%ApW`vL;#&75t zZ-KXJ1!mTS<_$MW@XGf+;L#_psJnOXjue_W`FMq!Ye#02Krq|@{3Bhy4Yq(gE5-0^ zDNKwb9_Lrr+NbPwXjx!~S(Q(kra*Go&Y6vl&cxK)!~`1B+ilYqZdD`C!?sxs)RE%X zrjlwj=ifr#rwWvt&)az>(k9)1M|&iHbb_1C%Q-ZU{0^EmX%VJfcO9{Ri!6q+9mlVG z;JnIBnF!d(QQVG(+8UD(XN*sl2nY-`$#&%$C-d>LVR!MG&&|Zc6~81-@oet5k~(aF zFpQR(Sy#$-C?WwojLA4t2Rc*tAw+q(aJd%2eV0 zsY!gRZ6YD>h{Pll^d06pt(M<7*3H4-Fl`3??$~>CK234Ezwek^#?0|SGW3SZ!Jf7> zOvQ@L$j~Ij<LPaS*-rJ?Tcq z3{G`j-8f<%fROBeNAGASMB3F*vfbXhL{u|r9Ws&~kW_cWcQ)4S#SQzlHZ&z2NnQ9lvR zzB&j%)o|T7*pvA=r|&wMw%Cj$)%Xmp0Y0u{g_Ro{8@01+Z{2xqc~?fh3fuD*e9K$$ zfDeSxXsZz!t$6N^6_Uq2bQm0STTeWWeT_@5#}fvW`1KM~K@7Qg=&VE&K2(#cj1h}_ zAZ|{@8#C|y^ePK-@U-Kd`wxMZ<&!qw-72`BX5K`+?l6&oELF1WcimK_ynd1HnqvP| z`MM4pk~aS*O^0CaN=$l*^g#amK*_S%s|`x9%;uWCj^d37RN^4sv&6$-g~iKI96v2d z1yi2(3;0Q7e>o{^hF;@AvV!n(bntRB;bIx@K1D`}crdE(-hDAof==_l`)1--fd4#!30<?6`LwU+$BaxG za_BDGjNw$LdRDV@i83*-2%KbxYIx-^2_y%G>i@<$0cd%9D8*ur}r2?)bLAW(BiPqGtRPJ;Yz&1{BblJOXNHX zXa+esmUC^={h~amL)YyrMH+Q{3Wt(kU0t2f3)hb*1}fL>6#8Y=r=)_IH!e@b(#QFq z#Nw2zJw562*VFRDb1SQ*t98+AAo#6$^NGm!m=nL<`4eLEM7)@3PIx3UI~?Dim_YYG zxo5Bag3m<^&91V|g5%*>$`BrpVReR8A$?^0C>k_4g~x-+*YIJ`h z4XM#0r{-L&z~+v9BWL_;^($6Dk@3_qX&uGB=);1F}tk_x%>9$>nJ{F^p7lg!+qx)33>=0iB^aW?Khlz zUv2g$eI?aRU)Y3}A;;>#@!7zM=&<+G6gAVcb#k?z@qsdqIg{gp6bV7ClW_d^OgyrF|cK6@!K$^ z48@QX<{gqr0%9$haVj1;s!aAtOe@Aqqvy^A#@1DmWTV}L*6{!xpith3YzCiW4<4A=MI~^&uGsC$%-}gW*WYfJF!8_Y0XqVmCrIVWUIZ84%^h|+B9n@Y!cF77!}FgUhoRdntJf@W@Ya!XTAl^Np_p4I#AXR zGrYy3=>W6l^?XJI4-POp4)Cav4Pz-!`ZTz?#SF$xOMsjxxWBZarl(?l`k5y`E%rx? zW$)LShWd>XiBpRIV1SR8AlVg;4OQFDm9Eo=V$NHmvG*OL?KD6bS<$}Sx#BG@(-wT& z#V$);|73SOU%#o0U0;&CkyVERNgHbCMmD@oTO8^xFsKz=>oufB8 zSEgJ#`uhB1%t78hpId56vLPRXp>XG4^C`?_&2~fg@WSbem26W4-&dwul%(63{x?jR z_Ne#W64e%JSv}T8jeQjtwn*0uskEJ-hEt?Y3LR$w;924}Q5&Ih=ZUeBG^=wI9N-d^ z5X$|rU;NULaF7Qn&EXH!>EZw;B!O6>Z-Z-=LWFGzL{>7m{H;5C8TotI88;zOg_+x{ zEY(d`I00J$CTgASvVjgh3P3N?+4D6DHef0|;nbzk(U^iH^6}(E>3(9UM`Z8vgwtPt zpxtl;{V^qmy^lCe!7f!Nv8BXCbSlG&y?XG3KSWeS3DbJlQmEu6V{FVyH+6 zMk|DisnJ!P35EXhvEj08@*aT$n+7T^RN;;GlXsRVlnJ4GhQYU`@aItk%`h+O%TO5` zgWXZNRWsW8z|Kpzu1z=qN$G<=B z_*)*V#9++UFffRv8bMHM?&yEht>bZwra1t*gpyl-^@UY}I~RUm zMT$vdyfA>_Z3;VnoWU?!nm*0>l{ABqg4rVY>>wJ6g8}xXg-!zrKRuqoACL!2gr9nE zJugcNqjfORX1vGK83k#R&l2*Jdk|27@rHwT%cYsd%ap#*jfP{OCTQ`)e!%O2(5?Sa zb?Ut@nv7_lsjRGw5mFgVg@JJmp7I7%0|FF3emkgNt!!8ojGPuno!=RR#>K?h0+&T<#~F}J-gX{^kVlODpJUvd3Pw_w zUr4UnaxC|niL*Rnrsoy~JN6(v)n+npB0LEvoCyqi(0(E=Yj~fr-=*vDn33pEjsTEJ zJ^cXZUWl^pX4U+rDDh&YB%va>@=`P&5@XJBRRG4#CV*|#S$PCa&4@Qf(E3dGV(Rn! z69^!cWbOLCRoT?Mpi4B8G)V=#=?vnhja;e%LIdkrTqWQs6C61DTZkrv1UU3EDH`1B zRDjPUUorWjbm@AWNcTz8JkW5wExl$emR1v@`-35SJWGos)vZ3RcGpOmKoA^5zYY#^ zQb!3&QPt3~-v@?Aj@r^ak+O=yKn<;i9)z0ICD0BtE=o|(JJ#(kuYo>uv01|=U;TC@ zlo#rwQywwZFEGoAjmyjhL3^;Tq0^CYKl57@64SWMjtnqRI;gSyj+(_73T%d&1Iqv@ z`w#+X!}CR%O<{OkT%wE|$A_AwV2Z_Lv{0$MK&D%v%BoBs z0q^w%a)Y%ioed|K{nNYuth){t-H(OoC;*i3$Q%1#p#%C%{L*=8HeEb~H;>5Ea!zCB zAWRG#7@+^`J(Zb*=$-RC#>ZJzfMZy=cMiBG7ty>jJZcbz@8)3FbWa%o3tYt+W`kpi z^vp)dE)Z?#eHAPMQ2A!BrGH>4axh6x>`MzqNVfe9&du{)irtc*jqu7V?Lz(okt?A= zdpvnGJ$8)xVRZyBSZaUO02Y-Ett5eXF}I89=+IvJB8SQ>_Rhg%TUR6eGqQpQUcyP(=Xn-o|Pr72eB@b@daOR6}YM@ih|)pj0t`06jc zy`z~xC>+?XHfI({20{mdt-zfsjt+3hb^!myz+_a<$FX6;7CTJM=84Su@L6WOW@60H zEsj|}5Jgyw&_|{{E7nXG5w_)LzQBwW&|J%g=3SBm*;YSiFSXU{P< zcoTwU5vBM4$|Y%n#g&Dgaff!P=`<0B=#auxXuX?I!)p{iUqD1u@)aopB!6=!+bO@JOU9~dZxJyN~2ix_260!o%tDWLJn14Ghl!Ef z^?E6XFyJb}e1uxV#^tFRgQcZk674Vwe#uktEY0CSPHK(gh|@zfWzD9lhDM7{bj+Q@ ziuQ!R67SFQ#-t@DWWa`71eia-NF3=pLd(ejtNQyN=roB9yAOq{`1gS$jyBA88vblJ z9(;R4ns9p|H>S*Bz(oY+{)rjq5q};D5Joh3KSCPK)J!xYpu*v0>vRq^oS?3-k$LmP z*9%{;Kb6Cwt#>csga>_dl4a|U;)!uY$zEG7ly6xNG80^VH0#wLvd3VqJ>U81w5I^q%g z<(jB6IUL^Hgm4ylcHpd5^6aR=w?uD7^5Ofck|QGa5(u)w?`b95ABKS+vaCvVS^yaJ zD44uf59!Xm`f{6B5jX?WU^Vc|_#8~K7W$1QT6Y|!AOkjsN{Ga`t_osID@;D~-#E=; zz<$zwzjUioyoe}*5!9JN0GOx!$Mf5?s2#Ap!*6-VgFbIaUl(17oKTS<^S+R3!LL_$ z5LRPaRVi3J3&j4d$TgL~$vlH)K8JE~8b+1~inVl55>U|&Pi2)egEQjt-=;|#4)5GT zRvqRt8t%QMJ#%f*S9@%XCGLJZT_y~4d;wSAjtoHwHan`)nP$FXT9p;@wPz6>*#W`y zq3gvNcpE!1o#2#GwEm9twT2M2OVK7!5FN;~!+ou7xIY(D>Sj`2o|-6k?*PwRrful> z94V`CWMU!4Dq>37B1;eX)k8;$HYG8gy39CNekPIk~Eq7QT& zB6wnd?~a%qxA?HaxCL0U3~L>Z%m9(v^mX;W5Isul7%tQC@uhUQ3AZ8DY+LEB{vNm` znvC?|Pf9^Z6iDv!rgQM&l8(wC5y0R-_frA;+(}cSQ2kQ68|H-JvVU^FW+z?#d%C}? z#t`$nejwW6VAW~7#Fm|D%=amGWL1@9Iof{>lsBZzPX6c5R!C*EfanFHJ6!3$;x@H_ zp%I=S@0Rqpd>80^J9#RWf9QOpiujVT0a--`qt*||TN#@WNZ$uTdH|rj9AMCj4lGLS zs3l0foYLewFBBj3==`{}X zk<+J!JePP^F22}VnHt%ZN0}09E(%u=ruTth|NXm%@>cjxOo9~?@??8=>YuvmAA>y& z4RTAxGXPO3Tzz@$ZN1!5dZkSito|G2r?9UPr|#4EdB8YKmz%-}GC*F}g)>)ZxsNmT zoIw;RRewW(%GX3@x&HL%p+4*g7s3%tpT&c=Ri-9TX2aXt`=`h3Ax^m+F{5NuD9|Pj zl`!(IrXSbJ(d9J zp+r|MZ7UcCc}IZRGDIrDNO`=Qv<^34g1^SMZ0Yx7A;h^o;^pO5C_kY`84cJ>UK$UG zb)mG{7O+kF(1Hwu6m5`G`h6J8)i{Dm!m&gzQEp-Zw^W4l#5g4rV^f<}!uaDdPiF`! zOh&^d(eQqe6V9*-qU+adf^3IqXcG|#h)V3&FUD(HfvInD=tSVR;cSIijKtW>WUNYCMY3tzgm@yk zH*lU01P!JH7!xYS_r(Ut+P7gV?{-LfNVN%rrx!Q%Gz9YkuCWzzoQHx`Rty_MT|x*^ zwir$f`vy1-p9N27a9Mh$7*Qh#vGl;P1Ob}g?{T6v2gNAJRLK}V1KQ0F$=LpinsF`S zPD6Ifyntc*7>+(uHAsgco>6_YQPFw0DDb!pzlJe9=rHlBG)4>Ej`8{De;$HGRl`D) zwrhpN=@!aaZO9%qCw_Mm?=b8aJ0jk1D)j_o~Qtnn4I)UKa4|W*gZt~5>ku6x#}WxSIub-=|;9^ z5;_pONs_g(zHs@ktql`II+ClU(sekR$pKz={U(_>1CvA*Izbb2Q4GGsT#B>vr)l=s z7U&g7U6-*H#$90whO=>dhZ-U=>m5oQu&XhKr&6Iu!YhN`95M) z3N4HI;zUc7pq+RMJ14`7fpGE7ISkxBgvpgO<&vP)13{u^BNMXco&ubUoXCb%nBhNv z`&LX&C42yG$lI>*V#*ul6X($eUhhx_VMDDMrHKF)BgO}iYYwHUD;bZJWXoHEK5&_+j!DHr#R zq@MZC?+@0J(qa4d?JG{TFdhR{6xnO++qs>;y5b1-QBNBCGq zngjdqT%3!be;5NG+R)5GN{kc7PY#m!PH*qUwB?x67)S;gR3D9)ty%I6S^S7xmO)>~ zD$$AR@C5V5eIx-wQM(t#m2{9$l>dX+R`IOC-~z)5hzO!+m$8DwpH!XHIvm@FjdlHL zOJWiVeqhLJD1M4EDfCNlK5v;A%4QqoOr^Yn7A{3e#$^j6=%Y~$P8zU29HpbE0N46Z z=m-F3i4z~5bsg7<@?(>(5AjLZnBSRnoT(pxdEsPE;LNaD_wY$t4P{4x5HR&}pEs_z zcHPKohd=q%jx7DLKgRO=9cz*COgiSzB`UPq1KjdQCG254=z)))3>wB2Vh?l82WgdJ!y7mdW*!k z|JqX%HEG{)6~rOP*xUY>MF=H|GeFN7nOZ2W1p9aWf$U8qwTsRpjwjgY)N;<-(-nho z{2>$Di*(j0pNj1lD(YZcl+Vb7dkSS;x|kZ(hX@|{i_8Cps-V+$TiJv`9+ zDqIKuB37AMF)*D@w?uU^u^zY{#fVgyKa@H3HR^V>Lo7pDEqG;3>A&cLcnJH5qY^aq zw^=*Y5pN}hesDxZBq8~R_^N0_`k!E25222+HO~XwuchB5|3hpzbRM(%kmnsbZGj|TzupQ)1_#51 z4XYHdpmwOrP&c{Ke1Dr9vK=OE4Tyd(enUW4e1WKMO41J3;O6G=P7LOWdJ;b#*7+AQ z6X`R+jeH_!06xDF)r}}Khx|hj?4}}yGOM#XkhpCl&EI3Y3|ynypC>#IR>ctyjsFZ} zx-wMG8ZLMSgy%8l>xBF*p#~YmX>3fBElBQSIGalbX{EC2}%XCPsvQWXwO3P%=e zpxVBx*fAuVWr#TtK10N0xzz*${Ljgj2i1^6I)Lz+K;3=-~zc4O-e-j zS-bj1o?%0C=yMJ z@U*6F?Sqi+w987=TE>#PdnxNHlg*!|g< zxt?wWC>S|X7$`5_LPsf^9dC~JFab8U~rjb*7Vpo%(1c?&^24O z-*3k-*Y>4yfp_Gb5GIIzkpyM2nP-AnmbxKzQbS>*i?cIBJOfp-2A0C4TeWQ>Q&`em ziK`n~n2NKXa@FJ>$Mcgf{gqNF#I-d&2=~{90RJr?eh>* zw2B6DI%YY8F%Anzzh0>Ow)uCR1xZRa4q~CWmU3{+!1YzcVHTCCk%f)l>@=TLJZ{Yf zw44~q10ZAu>0j7bW!fK$oiNx3I~qmh+16n}s3*#EYn9f3xth(03?QSzMVgBg-_hv8 z`L0h`>lF`;#fl6MC4=8ZO73bR_XK`oK90}^&{f5Mv-4R)h(!aeooq({HXfTqV#hcF z*jVjl;yPIlm%9PnX9C}-cAUl9KS(R|TD|LslWB2t*NyX8&On1J`#P(nk%EF6c!hHy zSl^K1jFzlb31}q}PU8k`Bb7+vd-BkOpTg#>+v9D5PaP;WPN9@` zxw|aRkXNSnqd??1ptnA@%UU<_%p%$`TDG=#0dN-@Eia^gt)r6)8}_Q0^$u{y6(@kd zf9p6_bl>{ine04z9rYr*sWPiq%eovNN4WqX`zoQA z9Eb<5^8K`~SLjpn-NSP^G{1QQSMY%2#3UY5P(72mI$ zv~1Lr;OI!!vA06ye0t+UUA3BYxW##3vfmfyVxFvGWcIn8zs8d{w856*8UkmYPs#(O z>XMcUIW>+8*mRw`hPs=HtpTgJflarLuj;_`rP+crkWz{6?g6rx4u`+Ljja;?aW0~A z=&KgS=)B@j0n4N=6k7u=ktWafw7gbOAKgfugpXIPsA`Y_)06|QfcHS4oGhl$UubI} zG+_oEba;Sj*a(Z|x5GkB{t*|tT9GMagU?1JGaqx0ei10(aN4MUM2@u-VclN9g_-hZi;SR}>NZsn;tFbJSWRPxKZpS3;Axg`49d+5 znRwX=z~RGNuT)~WGuL&-Ts76TViSk9E*+niKtSB7W%2kfeYI}q36}E;Pl3@Z_d;t) zc4Iz*5H>I^6KkG>$_1@PIna)K+6FAO&qWlFwY7_1D0gp;va`GmJb$A0VM@EJvxZQ0 z1L5oVn-_pCG~+8?E^OF={iHzA?AdkfHNfr3#bp$tmkjrCuG3k#l#`PUZtZN`(Ze-Z zZXtR2vIT%wnWBN|CM8ipQ2qd@vB?iAzVOVpXLaHq2pzxL2BwYw(qc{D;ZUE};32JL zTfdy<3r2m)*IqEocM;N)ABS}e5O)=`9}9_F=1IjsYH{(lAXLOEmt9~%U;NeX;;CN& zUJDJ98lZ#RYoDu__@3%D3i6iZm&~3Gn6;0)?BMpQ2R%;6zwWK-LIUL6i_=OIa&e#I z$~W9{4XO`xPFwQ}PDM?7VWh)z9|XS74V3>PZT#GE6nx_hSUPP!k(9dYt3)dwVr&g5 zUW3q4o`9`Hz#Aa5C{%=qifbo~nd)l{M!mqSx|3GUz%<1Lifr5vGgA;fhZnm43iZM+ zv8OVZ&D-2B)y)n&OpqsY&M**^v0DXwS8=wgM28>`ik%?Q#{5DfdQ1@vmVx4{Vy@RA=tczm!G+L;>!e_8~yP`R9aogWqF;5U9iI99l zrqA}{wU}KPjx#INdM_=DqXSs69EAILz>A>i?pSzA*5APKE7;Ktb#PxvE5ubA9|}D? z=Ss+G??5JT^LW5DAyeb>k{OC#X;bym#LTtA(;N$k=IpR|Lw6ja%NKwDeue3G=^Pk^Q*KFNcxkD%Z zB-5t?+JEn2$6;1ipwugows85evmXl1sa4f1`sFmS!5~A@O4;4Auz5R-rzOa1t-^a# z-e}IB&fw97B8p0Rr~!2(tAa++6$dF!yYTa+BK0a4Bo6t@;PN_qUJyYOC7=*~R5x>G z54ur-L30Csij6X12t-?wv&Sx!kLbbjQ@RZZu@BmT&`edhcPr%SiX~0)$t$HS&-i%q zB43PedMLP^LGyAi=hn+H;nHFC+`f(HX+QgbgLTDTWl{K{Csq3i7u6-hH_c3;vUUX5CC_%c5H4>wc14TFW41O{+UI{-iWeRAg?fhA|j@yvvd7Y zN7Fn@=h)VZpZlD=9wXjRLX;N!H_p;I`BXHF1tvF~_wn3*=9>a|gIANo)_`*wpl6HV z5YswS`{qhycDC5L;)r{2x{}Pt*(~#80MX^~r*2a%2{hh(9gdTcbNa@jR|r0&n3U!S zE$>G$rOO}eEGO2JgtvW@0-OtziX4RZjxfZU0nr{vP*Xk>VW=f(A}&|b*aCn|4JZ^R z!?|R)LgKx{>3_1ZD0qp65jh%v@+NlIKan+rYN#lz&8e{@tpTdx^6rE6JB#ZV>51j1 zA`kD${)(VYHRX~~&^HVnr{BP(znS4WP%8^YBKSPw?9tbe9sRbD-OafZI$pE#W-V%= z8hXKbE#*)hN8yo6VdKM)yJ>;-`YO47R4Up&!2G*1XNm17q7;%!6jg6CDHdi{1kBw7 zO_nw(3JB`=ScK#aQnA^fK?Kuk#1Bn$<-Iq{vz>>mVFNBhf%?U$ zTX{GzB0D^43k70=-H#*r2E!Pb%f}S78RRyw6V*G z(M`3c42B63OTrBo06T+8-k=aux(%HcQ1+Z3t~|jCuih#ag#tXtAxep!>9Ihh6-7HR3`9+(x{)z4>pD(ovh0s?0)`%tZCwdn z-iHx4RU`V&#}dY2P7wz8=`!G5y&qUJ1xm&WUrR|C7ieG%rpb63kZW`D>ZEzWZCR30ZzYGOeSde=L3Ns=69+ zR6vkW{!xz;9fCeYi>F6NK2ihtbI4Y_`obgUVEbX>0}(qWxQ^RQt78y~>Ozx{9%`D+ zLM$O*)%s&i!-+VMc4|#snR>H48&Mwmpza#p{Hs2t!1oFm}*S z^04AUqEk>Rr|nij%rIq;o?8@AYX*A`!(94c8lkgP4J9Qz>>?x$9V*zmX~-Cx~g z6~~Ok#4Y`%@6=Uxtm*hv^sFUYq^M(g{N;<_Dfy`oqKG8&s`6uWw_|#*z9&i*ZD%8S ze>%fP&^&5E%@GF^WThkHB%-|RZ|O-QtSN~>4>V zSZRbbXHxes~lEau~YRS*Vg07^&oNRGg<^|&e3jkW}c*xjLo10MWI&#SjxHxW#l zP}wYzVz1LWEpnTGmB_MjZ9A{iw!=stVYqIg%-=h-qOyq4sPlgxHra&wAiQ=Ihx$&7 zjuUB?!E;D`jZ^``q|*7sWVf0kauZ@1EVgA5&k z-FG6o+bWUfuxk0XI!PFE+9E{hN$`_)+=i9|sJ2$0NJnt6LOwZO&xyVGH{`l7Huop% zHYYQ?h+WQsq?AIalJvJgIvV-wv(1w^M`$1)4eNlXL?wB8G5bfd*D$JiNIkI_RH3YVL4&0eFiRt z6xR8!JK|k(skFc50X5X6x`g6SGrW;VnM-^(a6DY+{gH{*I z6GfuqBxJ6n#Se5;L@Qo>BHruwH6XH=ec)H|uKb=-x?zljQ7lmW$RQ5b! z54%D|(?~LpG4U#sSClGOAgI5P*%YcCICip6&WCmX*Owce+s~z`rrnoo8@5r>*jAH` zeJP^np`sUz<2ayyn7?vAGk+-3ARC%{y(s zd+md^;c_H_?>S2~*~nBLnB9cw$5{E-I%VTTh`0*p~*_I5t5VgRM(pYQh#b5eb) zZU4KMmAEkY_MHR9Ps{oXw*^wU5~qQ!Vq8Y=E$GOnV$O~d*6gktOm9xwE4+UFSKI8C3nlAkub-98@_<}DCX3Zn;Qy3$A{BYa z1gVFJ51%{wm0K=!^ijVczJD%&(HCMfi=)$2W&1Ut%hMd9#QVN->m*k&9C5kL>|k{v z3WkSC!~X%Ql$;fU0MdqN#x7jwxL^XO;#9wBehd4@kf@REjg-7chT$xN?Q#Sn{ZM`& zcG85fH;H5sZgV-?poUYek}tC8Ns|fu$_n6jcs>E|@wC4=2KNb!xSQZ}$JNTV^&cUR z>BIE1=W;v-Q|dv&#PyDFJNZ+m>iFT65@S5ge|MPM-T{y-7+xNtJ*USiu;}@IPO7S+ zQY7Z{VyQ%oQ=+JYd&_IOaF31FznDD`aTes1>@_WKn3lTf(%NNGi=@0S{nM1X1 z1SQkZbv?S0C(E>!M0;2OZRCj};)_3)H8xZvo%6Lb*QjEh?t5MCV7iS+_hoB*#P2Sb zy`DDuOH0V^&Myajsz4lhxEN7Z32JdeT8qNZVJ+!?Bw`EB;nql@Y*s8|-|A6=+aq01 z&R@%xM;Y_s%a<>I7)c#w9Zuo12ZEKT7P8Y#b31B<`ln}zDvF27C?%ULexM}qS)(-d{L_l zc$xva3$U!R;U@Y_MkU7Vn> z;<-msq^2^&NGEpWJ1fWuInv{@4{0@iyk_0_+{ZpZFr<3dh9di???r~?Y?0PFU>}H; zZ*KJdMf;H{r1#HOpFxtD8Z$6|;9TlA?we8fqzOH&yPUNK-p$5 zL0Q1@6YghYg0a8j zyJ5cgl!^pn)qarSTAtacpI(17F`^)Qv%bleRcO9vU97??RT~ zI}W#c`(uoEIccBWtpkyFs%xfWE_{m!lVgJ-#Zbc{a4$gWqD<1#CW7+1lU?ob_aL)E z<|M|sf7svTUg$1b0p3`xtI?o} zeN>|AVVCTCG`G-DQ{!i*)!z~`8MSeu$|ndQm=&=bw%hf!h_aYA=FCB77W#7_(hGtp zqHsRh^>VA!wK5;zx6c4WC5?}QHq33d1cq_|Ra%tgqp(4_=JcB3haN}<`XqL|ShG3P zs}2%_kL|DCN6Iw3k>TsT*o4c9Ki%8MiEK#ciRL= z;(LO!xW=UDNf_qR&SIm?WhB%q6NT0OdCG?;oEgqvJ9EuH&99$*!?^zNX+!aM6&atq z_pbMh`La)4Z2#Xcfk#&IJkmOPJgo8Y>9ePKZk~O3Ss+-%q&nC<;bpo+w){ix71o>& z_bV6{?wLOFxX__VA|au@)1mtBINzEz?~kjyQ27hvQRf%KG|RJDWBXNA_gGuM=C)U}9q^;n;=^gm zxUJ9kt}sD-wr!|>XJ>;t&paNP;w0ZDDg#c;?P4~$_$Umj|uk-^^O!Tfy&Bk9FDKknKlJ z;FP<&ufBVybl~Z>^)&N8^X zQeCm7NA$H9PDlENlhF-QU)MjN8#&Eo>N<~n2CQy@6>9*jo5A?=D?zBHeM_)#Jxq(T zo*oyY$V$ZGrIJAt!yD@J2AE6pu<7TntYLfuHUY?8<#Kko&^2qCZ`0Ntx;Xy=s_vUE z_x_&RHCR}YSFcyNVxtkmWi)#l6cS=J6-5G$}(I{Y~N!q1G$2@c$`0NA&h-EKtipHP^(NVXU`qq|@!`8RD^ zglqQ>fAxCuL{vQ6abz3z{|2au*BWe9_`k2i6HMJ$wMHIt)ys#3L_}u8_~+oor!Y{c zyvIjWbKeVb<;JF_wR=20dF6RfhxCx`qN=}?8;%>jteKV7dY_JL?E0Cau=#?_TsTvA zoHo%g_Y~`nl_I|9~Wa6RB*>OE?>BKaqwPe zo5p+&gmi4kN#`Ph<**U(lf1Ok+bF+5M44{ka6H9oykeJkNNqza342; z?m4eqxsqbl{;+C9ZF$q*)wrllGd42JjWiA%n8(l0|2zFlznv_Sp6j!h!ML32U+~!V zHOD-xT;XSXeaw#lar)o_giWD&=rsy6fV!;{v!Cn^E4m4!V}Clzs??siyJc zmoG{R3JfBzcCs?pUADEQwmomr$n5vLX}}M>M6KdXvN6^fxCaCT^jdDjYXi)*Tmdxs z6nEnA(nXw{?uew9BEgdQvHqBx@$nNUF4ex7^3+kH3~TcAYhv)~)vF(EJ~f9?i*#=e zRjJLg+`N3ABzl3s(ceV8+yJjIz_sk@o!tVE5A!nS?d8L3iZYwtxFQ|-lPjnI+8-yz z0Y$dUT`k+#W}{k*H3(XR<5VfY-4xhgSC#NxDBypEt~<)*!_Z)zugT)3tYU6%?wsTk zJAkXFX=`im^}YrbK?B~1aUf9K=FiV7h&qpiRu^`lG|D|8K>{6AZUQHLyt2ew@EEqU zr94SYPfzcBtYLYX2qMYrP#{lbP%v)URn5I=)23b<^GpLzB<48CyRf~*qTU6YUw%FT zGL>HaF~nUfL4wIgl$h*D;TMP1!P0ZBygL(&z#gp6t8--=TKer$3xJ8qFp#o>81ZMU z1VJUJf4+X5k5Cx)WBp5nS<0`Ik()gZDBbe&Rml6TH*v#y1oX#>ppDnS=`|g555iX^?HZ9N_%lX}4 zaqbu57!I)^A|Tfh--xve6ZW!qGijb!TnW5tZ@x^imc3j6&01mj8?E74?l|=WC&}<>K*C4x;x~ajEv0hJb07> z)@6NoaJ{me9K|TD7f+ThUoH%B+R<3`Z?EJp_l9b`IJz~}3G5LqwXv{Bu=Stj1&@y(mn7Ef1Ew_S}oOJ zy*Qe)hB9Zl{BCFv;Ffbp+3P4ix2YMotHu`xLZItDwx?c)YW}xX5I$20ppCd#4A!?DKaBH7Q`YN9qAbjh`0}=J5 z%?}e266n(+OH7mv2fAwFT5e}fls@kG`D)ixzM+v3-Ld5RH}Ey53sQ~V<|NR0#I^>{jVP@v#;NT$mLM!?V6@lF9pEI0gdV0Kb z1rN`~1Qo@yKX^0o8Plf=d)%kR`tv8Ir2+awj&=@~oyzg!!NsA>CGTJL^z}U`9D*k3 zOYAw@(s^8x-|EwCaF~~t>3nIfLbs3?uP^nr9DaYU;L|6joIFs@x-rNDwiyZx$T$zk z*bF)DS=W+uvC!({Xu;k3j6FZ?$D63RQR>%k-@fg9v0q7}debiW>psLBF5pFKVS5!& z7E;^Ze*3|Lr7(uDy&u@<1s@+D1B@Vp;Q(4XOdcd=|E;XL2=jC*Sq2lH-IxtF>kz8+aqd4|z> ze`ke+SzEEl?aR%`@?7V29$f@ay$h!J(DQv9q0gTyhJ3FDZ~7Mbdk+-!9g*en@bJ)i zeSQTN!37DC)q;Z4cZV!OZ{%(kYY%AIjaF?MKmoZ;T-Vm%zt3NerdXKkH5VQPNrgJR z#j?l&2pZf`X>bB53`k>UZr;4P_RpU?piPWG`3@rE-DnY?@!cfpXO^QhG7W2MQiICc z4~MI8-$5q+)vH$|3_W6#@QsU`o1Dv+0NGVQJG->BmcgQZgVc2tXJ``0d8|eBj)xVp4heviL>MLB1zSwPUzBwZ!gS-xE zIHe`cYF+%S!Xv(PiQrYm%O>0FZ$S&(=mj%)px2S zG?{G-gUbQ$nFpXmelG;p|KrDx3f9)!TwPtip&Q9MotF%N0kJQ!A}BeXBR?@JfrHM5 z_gX%0rbe{Ze566wdhp7}K3e1ZM08w7N5C{p9wnJu;`tD$YeTBm&+A@S&+>daMm9yvIN z8wkDyq@`C$`7baqFgOEUBNA8c$l@IbHKJ8<23#%3>T5{(=GN5k1`FrnE|ZcES%tN# zGlpJ@b|MTwB=qqy=OgYbH+Z(W3N}i7Uq{zylYWZX6!1kp)7I`gca)vq&N4JS{4-}5 z4#aJWc&&WNjNdqk7bMq`64`y+P`YY#21lgM)()tZZ&Z z_>I77_8^J2^SN3bj5Q^#j@aD15utQ8KW!RbrLQ|$FLg8e+_<64rViZ$M~^EjDO&@U zQ+=Z?c)}HT6!3jYr|Qu;MJL5iuUBM!6T7Kom`C$lr zHbEILC?q6+NQ$x1uJ4S2gg~(SVhKT7l7GK`wPLn7q2pJkHFA2Hb{saHl~=T)l@aEo zS$EvR9SWwPq3R&NXx>0))orl!yIyDsY}~jIkp4aLEJ5G9<&N{)4Q}MaGW@TveST3f z(vy!&kNxi#JMYZ@`SJ7=hi_7x_`kn=B>ykpR`kG%jfZ2(lqq|`3Bad(P80k0w=!Iy z0exFi!htFY;r7^twjD12JW76Gy@9mR^=oW`va;NEnOQOEivRqGU{QNNevNqh#X}1W zq$z;FhW3V~lr4C)x^Bv*nyQ7XR_T6_7123yPkg;=04c3oMufg?CF78|2n|7yq<=LmmVeKQ-p3eaZ z>H)pNBKSx#%XB35ccZT%q)ePBgIa)7{LLhZ3^@SRRdigL2G5d|b`j<{I5_wviX8l8 z?U()f^(*2?!di+!0d@HK_<|nUA5>C0{XWzBjmz}OSNZpkcoXEtQ!f7v3;trXu@=?p zlFs8YSrg;-s6kzY^BRaNzUFRJz% z5X&T{XJSdjTo*YwI(`R}3xSmC28orRv+cIPcm3YX^!^5ci1NlK zpWhu6!+bjO`NoZi^PB$nNsB@5e>O#8U_5YjxPPWv2oa0&^5sV5O!Ql?s5*V%z@1UV z``cm0t|D4lfl5?F3|BMOZQZK5$-e~oL^}OLJ!nSPja@qdRRG!B-@RZM>WRNBc3QmX zaI4Y(KBh;gb(k%XcfOv!K927*VG9sd=?C}h*)!Ayz64jF!GL>%Iz2X`y|Z(}L?-O* z`*>=QJWeUgTl41DIE}0qWV#C9Li$X0x}po;EN~d_5rzG>gS|lsqpzjsxk2H%+A$=hd$%lEv0{ZS_|Qf>FffV^4l)dN zg?GT`jf~ldwB-&SvH9nI?e!Mi>#b2*^^)&aaQ*gsnfb$&xH^+`n1&h(7iOG1eYzfA zSJGi{qner;gRx-20y15Im_|0b_wL=3bR6EI5+F1CYK{w1r3Jw%$mPAlF^|v)vp*-X zd9xWn`{m1Ee%6)Fo;B-`jGX*szPg6LTf z`S^qD|6^EuR`{#9gL_;pEuF46QxJ}wVU(sKniwx`@h9HuYbrkW5zL5dC!_N$NRJQ{renNF$Wjb z)z=5x`q}&A0@Oh)l?&X##vqaVqJ;|?A>U^)X3d}f9$MTvNxbl18&!V&V!H9rPF2!CN#)M`n4)C0H2FDCZ6 zvmbu=`nPX~b}WAe;Vyal!`V)bj5oM?pp@7%pfA~0@p3>mKB(&wkA|jGl zQwGSZ;{blJKGt1WgJ13|7EfBxhy(Ds^MOkd_(o{GSh?j%FRNcEu58-I=X`5K?_dGv z&72l%kSS`sIuhch$9-4n$9Q-EW4#1t!@Pa?4@(8B!f+v2486<>sBsOAq+!TfDw4%< zTca|x5b`_XfvF=ZN=k0pUea*twZ|_T+8uyl2&vy6vbrBa=%<^|Nkc{b1na#$|qqK9U>*yfL z3tT`DRE|UC#Lp(kc!QN+xNO->PEJk&jEJ9S7|k&+6f->nE5E*tOWOKN@E@#$$jch+ z`Id~R-IGp;-A|BdF8a0i(TXcA$BdjVG&*HYKDN&TQOzh}o|-hP&$0b6GZz0ABCakS zKBcz?gh3weGl!KY*AD)Pj;=1#dm?|p9NHobU;mn}jsuXmTN@|s;S|D1|MhjU4xz^f z*f-Nn96RArsM(9~+yx6BPQW!?=bP1Ah@0AY3UrZr4Cy-f;WAxB9u1%ZUSl0Q&iI5A z(ql_MBaI|9F$Swv52V)H0F{qe{590#9YZr|5Cseu;^?Tp-ZvXPOjB%og5PhV%UJgU z9MFG%og^4U#(+XLbQqgHeLCOPtsHrtym?R!a#&IIReCvfY%?j1xe5P1jp4x~d&|pf zp$t-td-v`ol=V9e76k{+ixcp|3Gr78Y9fWwJxA4{fh>G0r~!g9^ZH62;UqOXq>lAh$q)4 zM*8EzgxcUA!D*#zg@fblFRI#it?v$|NCXh(SgWPfD3a7#zL6TD1mY2=$LL?!At_1Z zF(4M(r1{%!TKx_Pile{SWGfQ?cd$~QPr33L!J>eo6F|&4g@JHZE2bGp4rE3yNTChL z0ds=77MJIdI6aS;HwHX+E=q713<_*uOEw}coIcIxm*!|9p8@&q!4QAW(ACRBhStoT>#`@tuYNzHw0G|;#v5cqdVtnBo%49Tz?cL>0jh)c z;`~-IzvvTj?B>C$G7dDW96GcR-1PJ}mV12D(-FHzTtWO_4~ao8Qs?rwt<4Lqp6bHe zqfB6a`mjl1dknv$w&T+7q5lTMHJXoCUtutIAI{C0*xY@>Va|rCuS#K4%q;R1tZdl& zVw*i4Zq*CYI+D#t22I1DN21{ESK*ts^2fGXbMh~r!mwWB|1R`e$s^r!XCe>QnT?NL z)=RUT4QbU|@LT&&t$Iz%23#4x(hGR|Hqtl(!osseede$&;CcglZU)U4zGZi;G-gN; zh{i?P1*+MsE_cQ{gN)@+RV^W}=i=frb>!LiGZc*B+5}Su>J)bM`u+P$ET!~s{;^R0 z22l;s{|qFbZXqy3#*c>{1fQ~S#fr19Q~>&V(9u^kzjvn1e*z>Q<9T~+->p!deNJ`F ziH=A?z5QE-Im3(_D*ey@VeZ{ivTxj#qx&no6P!Y0M zck4Zd0UuV@)zwin3nopob|CTSdbGT|i`w4;e!gSy-(*;jL(`dRe_H-(4&(6Bx)vQX z>pR}S=36ft98Ey?r{$0o<^uT>=R@N#ZZ#r8Owyr;rLPEnk+GZVk(EDIC}m7k%>N?) z=*;D3a-QcRXX39nSk~1p1$=TrJY?oLc;p*oR{mpGZk2q%J8s;!W+DT6pLun4VG8ab z8BkP|c&@DAv?K5wW}S7^6!z-td`h|hjE%@ z%Fb?mls2+fu5Ia?z7KSNI=dxQ-ml%q?>%pYb&2ew#h&MW+1yUbjQ(Zws-bgJqFLOz z1dp5`HpZZdOi;XqojWAD9Prj};f6P%zw;K;YAzo!?bk<~!58VIny&$t*@(_2TvC>^ z(4Th^I{JghlmFNs91>F3)N~zz*jprw5aWCRfVk)F?FuV1m9bVe;Lh*gC-qXzX8;(n zF;Y@fMPB>*``-l#OmH4~ltJ<5i|GfHWZ0>DE{kB5TFZ=0yapSES{v`{|A^VG1Q+wa~fy?+fr1~N`JVtmrw;}0K>nSwIGr2+dk^tst3U@LX(MOR#1 zY5(wiI9;n-Hv=z;z82gyvpn$8utM_C_$2!iJqhOIyq>u&IkkJXY zU?UcM-I4g|h~->ysc;hx!vIvK+Z3LTngD}P2d2@NR~|I&Ao5GkzW<)+H^U+SSWxf* zO&k75EpR#8oq>Qf#m%ikA&=-ii%Z0&eyP0j9LhYbATK@ps}Jz z)4D4}n3?i0c+WoToL}4Gm1l1+n*Xu*c0^kzpDM?al`H2kKv-n(^VLCupNnmIe&z&( zZOgZve^fOl@NW<-y5BFpZqdmn>N_HJF2utl!xC<9^LRSi+fMIHaxDU8IUFSza)ZCQB<j>sse@`MrZM zA%#Wzq3j}#tr7H!;+2B_Gzt5wA+5V8At*@|6&%3^>P`G9H{I|Yq_?+l0Ki=Z+Oy%w_IzgUF zop6KbNL&0B8EMOvBp)l(&n_}sc-2fK;n<mMiTt|s@pn4>lvCdC z-D=z8OrvX(RHqV+S(Qq6_JsYEG`w|RM@*g3cKYkawt{@YG0dtlobrUKH`Pco{0k2q z9bM;_FK5A0)YyV!fE|+a{)vgBw{G2%_$n?g-d6IQaD9g4IADVWV#Ef>qgg!T=SPMA zw0m)#WHE>1APc;(^AQoKCkff?9!lrEUAR;O|wAVMaT^@M}@RZWl3?`nM-`jJ!3F zR?pz?yv3c^oSN7iD|F2_!?l)C@vu>0?NEzHt>Md7xlC;viUCIiC6VogbPTR^^t$DA zJUJ0cGKg--A9>F3AqppP{Exx`1kp2nLW zuyP*z!?IuMXQxXqE>K*~`|5FzXZ`6T46K=!2L}gZmE2YWQR(dN4uXr^ z2Awxo)y+~xvUfl|j^h_20!PyKzk`b64Eo8kiVAuxsoX`f1!qudQ0=qPtL_E5kBV7f zMgpaYdV}=3qyYf658{D@e}olZD3sq3&4Bw)scUO94cZDwU+$NaPkNqa7>Uw+TqWZe z=rig*i!M2YlUI7*qR3&EM^U?#7y9I$1w#yj!RxMYhf4$Ib7Lw?ju|bq9`M~bXvCR(^2X}>AKH!PM+G?+wUtCU{H#AE zz4OAPikYkT{*W(ju6u8$)mPn9)-bhGnBY*SuWBv}9U} zye2qqd~01C_ejd|!w6evZ+L&2kN2yO=?Q0dM z^OB#x?M>We+pk)pUn01uyOyQ2eILKc&gIJRohrT(%^>5v?3V3f8jWRMRxp|+w(Vcc zT-@FD2NDWbn=u(uff@=WXX2(U^L*x0WnUi3-zmGex9x1W3OxvM#iEDm(YL0bNws+S zLsmhR)il}-AKXyavinJ?ok5<@&T!Wwq;e14>BI>}9r7BqW%NaF{?+Gn333-}o38v~ z?29U+Sa4q@bOZMl2GCsFr|!v{)-Xh#C{s)DsU4tAEDPs+rZ)baWjCwj!;JQU^tQCu zb2_5JNp83KJ)LtEf;f&MjL%a{Y#JO*L*u?*C(c`nT^C&K!pA5TmbQ3b< zUpKDpDztj&`%S=jW|*xwz2%8&{I7&U4<^qTuFtS@+IpLNA2&d8lXJ%7&kbzV-_=@69lA-#dS%=!`hkxp%LW=im&V>WUlB z;wq)O#hAtIi(d&pUYszWNU;-k{LxvPRHHny&n?gdv;XC2brEl0MWT!*}>hc*bgF&mqgyUq)$0i`LDS(Des<4TjuKc=5D*?Y+dqWBiH>z2>Po?F|?3 zFAwu6ndW?Y*&}15rs=Gr_jlQ=zwWJ*lYf28r9)_=+6KXc1oE@yIH<^rH$ukjKHdKW2;g`>6AV2zYb}|`J&&>elyAMde+^em z>{NUDWsilSeaoNu1#Q*GY5jW9fzyx7ES}I=%C4%nRvM0)9^94Trm^};k0A9(P4<@K zm_f0GH9w6@&zo)`CO^8oW_i`0lemX%c<1(9(>t~lLpg8g{U1hh=y{*y83nrT^I89* zz2@nSAa|q8TjhTGyVxDO%g4I8ArSqp)W{wyPv3X<-~!pXB^P6(gB_C@o>vvQs@6Hu z70Rj4zl6`cA3t~VOYRLICA#w@x^9aHw=ftw&f^IBt$Uar3f!#-=wW-7a2m*W=vzFp5f9X})%4MUL-c1#!{k%+Y{n)shMzAqNHtGh+3NMpUwpP@-m}@+ z1#P~jXH*KDH8S4_KlHRLRvA0ME|gZBJf}OrN|oJNw>taqbDgT3^mPvW%EIMV_ln2Y z;CUJuyOTF*TgRke-^9Da#QDjLnISKM5I0%pF1D7MT-3HctLjC<`ijOn=3O)Q#J(!9 zNV}*=SEzJ`51p>;+O>)N3ex`MCvFkEZzkF-RcMh>=TEGv^Lx~0LtcJ99pQXMMnl^i zv-f5iN`|Q7*%t>VC#Nj#y7e|4E1vkt!V*B46zH&|8Vi1V@VaTV#7fu74{-D@s z;`q)An-$#-(b)?glt!WBvwgq7X709o#B`5OcAMF&%z5T5?$Z83Juml1rbp8WyLL>~ zF4X$6`Y5IO!H3;Xq5}+7*asvPjMUFlam zRE72^E5%e?+{SOHLO>)GgO#AX}iS$y-< zQ}3p;H$x|nH(E{rhz-=)GdXc0S+pyjZFTnQF9(xazIDG_& zrn7M^wf#fEYZif3JM|oBdBu6RU+1gd%N_l088dkjaMB*$`bc%o1&6i?-wfcLD3X*H z-56m0#zg}VQk_aPTats=d~+kORpnOiReQAi{UzH{ zOg>EAt6NunkG-;XOfjLJPC0p%mTHVp-n`lfnx`FS_1vktn&l=g8hRyLdrqa{f+cks zvPsr+2t{mX&0yn;epQpZc2?%NzJEE(RE?nf3e%Z=9_K`y!^2)IHQjewKRDW%(i-$T zZ}ElcR)6^1h1pC`YQrWdGTn!9YN=nZUDJM-Ql{e%_*@ZfZa;EcN>0=Xrjw zdXAowk#e0@8Mn-C@8iCth-S)IsHYgQbIm%p&h*-r23M|AXq8_EWm%*Sq|e*8?d4lF2pOdqNBI zLb{49)|#GL&D4@J)Oy$AdBc*xnF`rzwY#R}EJnv->9-EB31nvk)VG;giqKeZ=zZN^ z@E_>LYOU}`?sdXV^v{kS2nq_v&?_G(XgmHj$hczn6l-+wDE$I?#Pp|>GB+p z$LX5%4;*MQdmr%EUFUtrFnW-)dR?BM;h>vUazNlbTe;^{jFj(FZ-q2Qz7QjhXJfPb z6eC@k)I|yns69SDz9;&MCKZ%7 z^+ieju8|eJaFqMg{kQd){QwabV+hc3<&3fCLN*-gHaz~XZyS`$E%2KvEV!(y?shtE z>ewJVLw*i3FYnC$e$_Lf3CDz-95jMuFFfPw9WgcY3Kjy)S5{|s?%Efvxx1fw?jDj| z`mc9sBF~Q7>(pU9(Kyvdp@N*dCaCxE7Wp#)9-OX>icAb}ORN(w@J^i0(M~Is!3)0nH znbkXKjOquM+}`WYR!gy14km_dq;V++hAw+~OL)76+hM|w^z$}ew#Rw&Qa z&7t1e{*pj0SHY%V2xt26!r_)(__o)JFWN>6>-d>dOPlF7#y7cm7_H^{`vV6Mqb4^4 zwJ&g4MZmGafFYkWG;r@pH`0IxfFSWVf!YQ;$SW%g5Y^v(E9hj%Aib0Qgfu|#t8sLu zX*M0UoYh7NeII`_Upd>k>K8NU-=ae2qQ>7|inJQ=Rmkj$Xt^=E%2NezQ1kY7E{2DP zn_U{ylUn*|M2=wle)ZqeW-18L0O-FYYe8whNKm@~`12DKl+OsUf{=3N4+Sb-gPoa} zWYF1mJM1^RYUJS4;wE#^F9=VHY1#VIc7`efg(Oo#KWh*uxB>+A&Kn$crDIn)Xa8*nUBXP?82| zhs`P&_pCnbBAwS4sEvPlb@R~<)*oS^p^}drYy1ji?n+(E?`;;CmszIr_UxaZoH}C$ zc?=OW0+cS40;LN|dJ6`!IO+sbkPm;xIjlbYjN_jrxw*}hV|)uu)yQQfuO2gi0epb5I=_C!TVm0Q%gr65 z?>EF*pNM5Cbx6uq>Rpd@{zc?Z0mABB3{bBv-FRHzYUOqwWGu_bK(bx|3|c2|?t3TE zB6G*2(~^V~Jy+yzGu+_L;#mcz<@PJwFCWt;uQVpUc?%z|-25aiBI9;b^7QQNv@^kr zK_G9%&m>$i0OkXTUWgP;Ybx0F=Ur-YCPYNAJ0hgR_bybEFkgt|dBA)_{v`OjfNR&5 z5dAZ7OJPyb%3fune*xu55jAvKbHY1eZ|-4C=nukX7Lz zVz|g$8qa76oED~hI;TR|j%}GqssvxPENMgtcD1L=uU3qoAos|qy^4z*y(1tAv*8?RS`?@ubz z^3O5<=9yQ7L&t4*A9za^aXDf=f6INq1aAff71lHeZB8~4HBLgQV2b(UIJfvg!nPk$ zZCQbH{Nr=*@({HLC5w@KfI$n(oIu?i04t!Mrd#nEnk@oVA>Anyc{01QCNL0$2xGNc zIVC${m!quc#9z`5zM#x#gZ2SCMi4frYaSWKuWi&*#hKQR+Q{O8+GG0pWx_KU>hA;D zVxG120g0yQ@s$^|0;RRQp_c*$PjqRJb80}SiJ4$Tbxl;SSQOPB;(FIZQK)W$z#vTU z8t@rJ*hX|dP)c%fap~Kr;qqRVvF4z*>FGTM<4TfWyTHGJ@bx%5I2JexJ#{kDi%o6E z_RA7srU*?mlH?-_fh$YcnIe`d@qk2`ot+Jhh7ld!~@Th194wYg0Ta;}^=xM)8wVm(o`u6P{q|=55Bt{65R2x(_yn$$XqJ(a^vvbMRAR|RI zNvLq=am^0kiDo>yE4jf2%JtDt;Kh?c2~Hxb1Z?bCz{6Kw-B4BBwQ(e9e*)DBN|2}t zLpTJ(5J;SeM2WB-s0sMpPW;hdaPwW!OR0cRB?9d4epEQ3W6C5V3c`^QLJ9fpAt-KL z_|D{l;R1I1AZUD%n(szX<1~s;T7cr2DDq#6OG?<9J0P6ygyF6VAB!G*CUnqrM9v5G z2rLm6sXtP?I-}>K;;W-J;(SvjZkgVhs~0mo-Z*F~Xs{r`JuTqPI?LaxUCPhsz@f)# zOf@X5JnT^oU0b6>uYD5`Gsr%w(Y7j#i2nh`Qm>Gv%@UBfZ7$(Txebv6go5z0&ojrv zNP?>%0o&cwmSf9C40ZS{o1vg2nJ8c|421m@pl2hj#nGMoi*R^XLwYo{9==fhnkje z`Xmsv(QwAd<9k5C7O{-}8+FA@CH9(EQ!B7-#CndLNM&d{)_T!@3tRidf8$eo+<#5k z(v#weG&FoF79t%0_wxic?I5TJX27Jnmh|=6gXYj%LKyk#z%|4Cmnng$u0`-^HaU1?Pw(nkK>tTn7^})@O*eC4F=9QFgjnRWXc>pUpT6DFPiRbH8pm@+Xv|_MP}NW*Y5vgzZw3v-&Cef#M~u32mo-XM zb(o#C?3XSt2>Y;6bQk;asD&jkE8PZHLtcAy;Q?Dm)aOd_{$~GEfOt9O7%u}28{?ke z938gMqsOE>iH07Mq5G8so#$kI;kuq62`!ho`U#o}npS2<8 zBDhCv;AXm4uqjhSfgaELzh@%tJQ8PcF^@xG%q<`ghF-X>L=X=Rf|dCw7j%mx4!e!lA_yl9$EJa2ja3A-Ln!zZ&-!%3c1Q4?`!Jbx3@RI&Q~;j zc*ln{^?CartK8Zx*5ke$w8>`ljA8A|{+vd99>DrVhXNgh?Vc!`L5{***M*hx5c5rM zX64n>=g;7D@>>FoFwZ-q&3!$BjA=_{gq6xA+`}3rJ9JwRJ0Tk&k|Y zg2D#&lQ3m4s83}|XGGM_qe;W;f?-r;;x3V0#J))GAPZXPd9XnjO$19a{nbPo6w0d{ zzyI9U?52?68s9SyBUYz=-pePc5jVLrewWBYA`<~BiY zw)Sz6RrbRw@S#A@&`6fZH%gg1;mg4@vAsZ#hzQP>c6u4}hxL zLM({b$RPkSW6%G*HPd)CQx`V7Fc>sHL5olYjg!Q0rKu^538asEfPilJ!k{5ShiWun zf({Zg-UzIVnuVK@_sFWIrbgO*eB;en*|4`T#z@W`^n=2gvN?9;teLiU$lhmGxEleb zrKQd8m;as|v~D+kaXiEc#tn1A1xRr6=7MVwaiBkF{F;M}j3}~#k2YRK^(FZXG0W5t zIp*paglz(%4*RsfSOW zJ;Q_ofiPRR2V8|iN?VLbj>68GDre)aRxR4tFi}sc%(aE1bkvn{&3`(&JaH4I_G+uaO86r8> z9v`;pb62MBqzP=An{o5@#@j~pQaDZ>^c4DV;>Ev7D6$Xh|8w0%Bv(Lc{0!Q(`kPHx!$3k93Vw2{*-O`Nl3 z?i)z-E;?V<5-XWq-y##xiV`#gRZI(A2k&A&JTg15e-!=-APP!=9)S?AlSIp+H-G7J z$!)m!ub>9nd=J65vf-;1xFXM*22f*n#Eo~Qs<5Rjh0CU6e8q|tJ^jMpUH1RAvluMK zkR?`f-%$&8-?;n{p1vpOlSgpEO2n$6bklg?)+Wa?ayEfn87s>8v$G?7jmUxvM}I(a z2+#s;K{miJHj*yfoM#A448+=64ox?@fHDX?8sN4>9@guCzkyODG*>JbDOUhJk4vK)2v3h+cUHX2V$7eRK$tr$6rPy$>sa=)B>qOSgAM(?L=#kgDsC zhm=_AN_@tNPq~{m0T>E)WHJE=hp0;EhSKaw>;>hAdn15tG;hBdkb#TzJd_Q(&q#O;KF^BoV#yvK<=dJEBQ;Ldjv$h=E>8Hdx=q9k!&mX}TTAUDHchcZz6I&F?TjLlpTnjtWBz$qHqZ0u5&ZJ+J!g-E>S@7Zd3)(A(-)HOp{BZosWkB1Z zVI{Y73w5J(7xOTbd&Y;%>WO_2|FiY~`@7F^S?~_VFHP8DVi=ewQP@M%_s*%#T9BOiGvd1wh1jS%QQQJJIZvl8XuDO84grnV#n{9vJ z4Cj16+)s??5_zOn@YLJBxlG%=Y68$5d8HmCR&dld`}YsCm!2f*Y@E+O5;&A>GDy1E zoTr3}kkNxhSv4s0yTjvds0jIw=j6LS;v(5FDuzj1yyrn6ifB2pzQYzy4&krt2TC{c z2nt$0DG?4XJE@zhWE z#or0UfwSs0v+<>6dGWl|inR%#&z9 z3usOgB7acl0<2Z*d)4xyy}d7M)EC@HHt+l#H?LMgM{&}c$v0wGc?_?r+&Is5K?Wl) zFZCgxL3?1Z4txm~B67!ETqp`BQ9RbzPC&gJ*`MrEDx&U!`Ws)HQ`e-L$7Wf9tvl4OQz*E$H-u@5P9lTQIZ0|JlM~>QxNU~)YxUav`&bj}AWlB=1 zt+})0Umm#fM$mV}X<2uYMx{#PCEV^%a2zuS0+twQOz$#W^zlqMfydn?9+GgL=&|NhwxdNB*Nle1rS#Jj;zXcif^;3B1d zV8#3_D=DiA5G~(yc!;EOPmoI(SQcY?9?k7f?9L2!7iB3WZyD6(ezx%Lh7 zI#26uffBgm)f&9D<4G(5F8ZtGx4PI|mZbZTsjF#w`^G)~H=EACKm9U$=BBic`pLC) z;%SbHv{(&2+bchWGcvE}&KF-U2(r9#Ydl?T4=)Fl(@)Vu$YRUSC}$^D8gwu)y3*&F zpM!>y9La(1Z1s1#rndDlW>W)tEvy;^HgNz_J<1PF6(Ttbj;oAOWatIjEp_v&<7|dU zj~>PSSBD3fvVqbSisO>v_vlD)7DW6Y4TFyYko5PiE6d9*@$5Qm6P4(6bPn02RwA5* z9{qXLGG->GF6>+yP$lD{qxB-PWl|5&e-ptVISnF_FBBDlgod2@`F+*7!N26!9#gWC z9B3{cT#38WmOoK=E2DF#Xx=#nhlofL5Y;aC=S)I&09$2T40E1AlkoQHi3l)h;VvU7$>@ze^>Q!|-AW0_eh zhN~;-@6QEao<&@!kU|WVk+28VQ0*D7yvU{6O025iUXDwV=fOd)P0hBe@sxZ<&~DOx zX5`fm?}+2e_B6RBNxtvetUk$yZ8lj%clO$=j%OBJ5_qZLvNhiz;DfzD zeM|TqkMWxffekZnqsPl4bEQ;LoX(t-6XiYgyq@hgZy!~5h$~RlQ4od$7nvp7l@DBO zby_wM@?pF^bB%?2AII#D7x^p>F4gN!Y2~k3fT}4JGq{RQBw(Xp!-KHgX3u+~{Ymnn z-gg>g^A4|BeZfn5_eWA?L?jRYu8iBO9EgDj(6&U(%OwtZkKS6tYldSlF&umJ8!MT* zSB7#EhbsWT!zSuaFumv;ajRm1n6TdvQgoqMSy_({7JkN!d&)C(!uNG!T6S`n*7fPrF>p(Ijhp{$)6s17JgL=20f%3`d*kP8b>oFY5OZP7akZ0{3df7` ztZyVTvHEbk>IW9q3ty#QD$K0>@JOb$#QIR4y@GpOOP=W2?mMEh-ykP`Pr1;P;853t zYtB?~!-KhZN;Hu-2koxWEnnIw$Jb-FP84g(;^m!AM;iSLhvr|poW1JI%z1u~Hkn_e z@ovqsC7MSL2U7-vSb8yquhWeO71Iap!ek3}iG_QuUch}(Rdf61oEML!cAv>Q*lC`g zWZG7A*`dt&bK|g)$HHqK-+dbGmRg@%*&(&hJI-(#F?-d4m(X((1QP>c&fwa8f*~{& zI1kC=A)_F(g{Ufbx)VPJG*C|>;ZH%Glaw4TSkQ96SRIEcB?h%9&L|k~d#Kh67smXveLZ|F@ED{^+fdpTK+?&|Raj`q+W{2h| z3XAy}4mzaeLHf`>WWx{x6KW{Z`i6n@kY*pCt_C?l1A>CvGjrR!m0{^%B~0(z&_GZG z0X?rfwc(N|>wcOwSN#sQbaB79>e>LXX^3p~BYA0P)})Qq19Wb0h6RfBifL;z(EDHiBB^P(p2_V;2;_8uBffD;o#jg@gNh2D1Wj&x z^C~5~UmMLtTmvL>+-&2kSC^LgcO49Oss7Bg$wGt0GtZ)4ms`8ReIR#~U2po)Tiz4x!;- zovg+F1eYc|HW^~cAbuq&^9L+I5(qm;sn51&S*l>>0S%-~UvtJncV$6vG?55?0I}BE zegSs+eP9Z=?bs0riWUSt!8h^DSj1=TLsN@LpEAsxCE-8BhD-K(SfTFw{_-jOfKo@y z*${RWkJhdx_mVJ?So7f;zu;L*!ik+YOE;ctB-R_K+3_gYAe~#$y^o7z_u+&J4$oKO z`^XxOv9a%HV$Hv+u2Nboz94ILZSkq-gyqluStK9guxkG+zvq6j&RrSrCe$7fgC*OzE24-FMvD`+4R}igA&gKipB+; z(5!JUNXz6MfDlI8Ab>?ZO4zQ9dwIbh^CTZ+9X@3v`r|Uk$XPSj zCB6Rg1`S!`2fDfvA%)tN@lB}o;#vAD@`DqrT8>?IJ%6^xQ`e(8z|X7I^z^-rzAF}} zRo;$$Q`dg#^U$e<_x>m04?h4G9m0Vl6hiT(+fnmy&`i>wjE7s zPXnV3(`7-1`#j=mK@}l65UB`wp*o*$=m-+s{@X*uL}C1NwtX(CGMLFdGx=Yf$yw|r zaL!#2nt%HAY4R3PG7S!%lW>B(1+lYhpk;B+zPsb|3x3;?f-xpn-kG?jcmDT!a=zm@ zM^AyS2y50urWtk~q*nw0SiPdz@~8a|6%v3-EGUj?tCUPs`?ARU`FEpLwB`jw${)YW zPpL$yKe%h4?DeU5R&AO0Bc{NmeUz5|madY6i!{sLHP8RKw~kKL$?N_0A zVh^tkyXIZazjIULI$m-5-~*dzyg&Ei*Y$x#)pOd)ac3^I9#Y>tAT7B5*=^ppA;QAD z5(BAB?XtGaOwM^$dM}<{JQ8P2Cm0>KT(m~vm3Xi2M3O;|+BQd-D#|&_Uw3PsjfdY0 z8-@9DAC~FI@b>o#y034MSG}xX^}PJ6f)G?pbu~Y)`y1D3*cDo;uZ?ufYAZ8g+pya3 z>A}UCOCLpQ+6&FYork%5q+J)79Q zd;R`3w-=^aa*DS(J8V~8@!~_xX=Ndc(G#a-LwTY+w6E1w?fufIwLNG0=mCSxFPe1k zj7ii9cO824VX$ciLKB*)(1j+c75PsW&Imcy^`)G=dt<>DA99-Tv*B>5W{uKkt@UHF zC{+o0n3P{I?&ee%7kynleA<6eZ~i)Ox>Zd|Z6-a@CZ5fv^XndM~>cO6CfUWuc<5Nqp3s(RDgTBabT=*)0 zr&U7EZTa&l;>6dQLU0d;2yY}=b({*qG>XKhUA^jH@EES@<9qG}I|hD{XJIH?wadjn zdcpIRrYD>)QStuPSvqZK@%)>bjNHGxW|VCHB`kw*J+OyMaZCdDb-Lgi8o zRSH@G_wJ3CA3I`oqIT=7MU?0wvG)m2Hy0;-(J(vobEGC^S6+$W$F9WMb$KD)SLaLQ zY>sWKd%m{RkD=|84P)&FL(OA%TB}nmPsnR+gZHek|H!)hwvrnTWkUk) zCUQomI=to$+>;(E7(Y{*(KF>KUJ-ixG+#;kTIrLSOrm}4Jiehj*0>g}Q(bzO30W!r zby`bKGU>w8CMs&X!qB)9UT95zg_R8Yze>!@yH80-o&hc;j z8AZ1h-oy-zNNZmhyJlCS_>C@~^VqQq7i*tRyBvGA*oo0D_NDjsTGsu!pEw5In>V>+ zo3GK~`LXgAU)H5UO@;6G7tf5=%(fX@*PWM4yp$p|nh_IT#R@}LQB~HWGpL>-x z=+DF2XJE3Ao`*zr!x@RGnWh|@fTdJHW-<&N!U0UQ=mW%_)Cx0sYO^aRXTO)J@xl4O zGCO+aQSaW}Dbn_~V^-Y;Ro^AIg4^G68#l#ok&+Q;+kb<$ld^uV*%B#7PNp6D9BXzI4!W<`;W1)l z*KQRsk~(TU_S&K4P1%v`=_AR-4_X;*)|K57jecXg^H}Ti3EMppA!90!tF}M4HviVU zL{b0xCw+5&(J80g*nW$+Jv6^>Hj7WZ^ReXHU=fo!;dNl`>~l)jjK?v%Ip3f@exxe; z_LC}Td+gF{Jl^Ly80GL3Gg6m)i~K8E2jxMe5(>lYmPDIVM3EMBhT$1V9@Z-aU@-&>pN?d@(djT9|lY1zfgPMSKGxdmdV zh0J_JG)9;+{JQ=AXud&^o7k=g4($TdbCdLDOx#MAQS{!f&TQ2+`&VoyVVRIP5N-y6 zE8mAYIvhF>TudYnkfH4-F)7xin-D)n!fgTGu-`O&ut37=P4N4$pR?VtbmC8Ki#pg4 zBDx(ggo|s=+@(8+xW}FR#&JJ!R2-@!bl^{r0&^Y^4kE$1q3uJ5jH4$5aEJq5#uuTm z=>iS=Td1ZsJ4s(tQv(8bD`*N?Fm4!lz0|>D)8U4H?ScW{df|N#cOdjKu8Q$CM)-E3 zAAg3<`WSd~dxalE``z8$StoL4jv2AVHYy2s;KRfYPL7X^)D&3xdPObR!&{hA<+8@c zJVJR>zG3>ptuG%9_AMH?Q%!gAgigeJ!vhCDn;F+M{7`*oo=3^pjsNnURy} zelBUSD8Ob~=yTWOPPRuQ08Qs*OwsK}YZN4$xMD7x6X|qZA)=AN{iVoCH4t2s;-|)HR(8tw1A7{VGaPHiK zya#lG5mmZ;7p-k}?`t(?4cM>4#VNk-y%U3|d&VbY_!_w9tMc9}cTBclCtUtUL8$kz zo33G9cS7N|DBWjUu87|+U;5zU`!UN|)^r6ta;e*<>#vILC z{aUyBbCOz?xU6p1@7$EqueEo3Lxo0WIpO;dyTK3jUEsZY_ilbTr=ZHN=Tmt@F$8RZ z$k*_D4X20M?z9N|-D)rG`CcAc6vuSjWUcLKMc09Yo1STCdcNSWI~ZR5D%|x%vqMu5 z!gazrN4J4EXzqDF#|k>;J}~!Ni*aRtS3j!2<_zOt5)5`@DqB7F-RUz zQA^UO7&1FC^>)rA)q?;EB-uW?0MY;`e58_Y{NXFORh1a~w-t9`8z^gqO3{4{qy()coYt0S~W=v)4`0W?oRp?9vhA|2BM@~Ig z>AQZyrRdI?Z=vy4&v~fgM`EtG@Vl3_?u!n++%OunDW&~A{Ro}ktrHW% zEM(S!mNg^I<;CZYM;|Yos0;kyns9gN=jmsakBdAKoV(3z;&0k790>6&Kgwt#bqi$F z)FEBTbb#5Zy;kZ$CfpB?IC1LItHT7EtIZfgDIR9H7Wm9Njj^m*gV8|Fw&qw%`YX}0 zw6_U?bnllA*It@p_G%UBk@p+=a`$PatlA>2#y%_e|^U2t!A;mitjt~Ckm_}1xafKD7v;21~ z7n&F*?2fv``wist5@ZD-2%!r35Rf$r(;oVc&zn`AWWHKpzo&2ZF$Oxau(D<=_Z_XD z+AI9zdu!eLuNGhc4E-s^uOe@@VAT`beHli{+%mI5?&O|ey_6C&qy)cq`Km`^IEGqOwFPQ1#g zVjLo&BimgowNIdr|Ja0%oPuOo%O3ieL^<2VeqT=WjJ|yO@#8J+4kn?=nc;Sk2`BYb zyT=;GgN--uRDF}m3U}k>H8Z@3Crs@g2-?)HQh~@}1j@iEU)O+GYBC#T!=UJ=e%pjC zSK`jLJS(y2b6R)!H3O#Q))eS%YxiMc&kCNqHY#+~s+6ffC+7aIXV16kJ+d?Kdiw0* z@hpoIB$DPhM$K?R8w4R#7qm}Lfi;but3Q6%;DF}FIl4ERr-P%r4?eIL3^_2j^l8YD zang|sAME))eCU0|BMLJn1HJW9|9h9Umw%j5dT_Xfo4S8`XwAUv_(JxR#~2(o*o)ew z$}*c&R#vJ@AKJpB1@#XzPW#NhGb39ek?x!xU-GfLN1R#aU^j#4t%KL(4u?iXUoxl| ztubNsqnDrjX9QX?n8dctG|zz#2XqqSgtVR%{eFNC3n6AFDGZQo{FF;FwHhN6iy3aGNJGw%lLMv_6NuNsjqsY+&DaCVEj;WJoJD%Ef+-OZi?n z_x8JUob=K2ci4pp9&er}GPyZEw;y2PHXbgc63N~*KiSznp9=8mILQY|y2pYjC z^Yr>!b2o&PY!t9;+nAO*ZJPXa@~{az|MR%BV7K99SPlUN4q8_(82WBH`*Z2Kz<_Mu z=NsORH1bdG;VA3>)KO)zK&`s8KxzIG)!uORKejx3!A_4x1vO=bFJ88Fe_!1_ zGfR)Eg$eucizp}mvR%l-^9S3tIP1oyhnEb76&C$L<~21B#tZj&6*(}<{y&nC2)elm z1lco>o0m5TQXaGDqK6NKp#j2kEa9mJN`{3(lP&FOaa$U#7bVpjDOSt*a!KuKuaF=n zU-OJFjk&qGG}D*%D|C3ej@l(9l`iOY=jc?ByHz%^w08Bx2|W<7&8zm0o_nLrDQV%75H_E;*mP-}H}DgLeBeBZ9S-n0~R*XK9*D1EGUNCf@`O2Po_qiPPmPcQ<)qG_2pdn<+xyGS0KnK6adg-dI zW>i*RqZe55`&Z*dRV^ypb?f{XSpD>5%46l*cla*mxqFI9?%vRVO{@5cl(xF zz7%~;_It^UG?XgYqtkgggLDRG<2o`O%)F9bt&i7RL#*qek1xdN8Gnj0em|7Xw`t20 zNTKc{nSSb+jg9c*soo_;KphhQH*8{dhV6T_%+HY9o*l|okC#rZ5I=RuEZ96XrJ^rk3RoLP7;{6-5%oSmGmg~jT{&b~I zzV!CfGzcoUK}LOu)jb(k^#FmgdY5I6!(Nq--tz9 z%QW|`NEI>pv|jB}sGs+pWwxC`3>uNb)fX46@0t-S`+BkZux_+&+nP+Tr3So63WC6z zcy}JZ_^q^lA^p#|dOP_Ev*`bCZV-l$%a!jeYZ0h z(RJF7-M0Mx`MtsX;})#?QNrpnVbxKh_RRs~JB<|A5lBlW6nlb{--TVX<4cblU+F7R{urp2Xop84!*!7mL{SoiD|DKRi6;u^ytcL*2lDRu%fDu&1d8Ra0 zbTUvt5{&4#UyF$IR;~N)l~X8HI`#f+=)5gl4EF1$IQ!yK_SyLT-5SuvHUy5mVict& zhAtPwESTN)$ZJRlF*D71Z=FEs>cDx4l2Kitw{OrGB9i~SZie+Fv2UBi*`7w>$5uIr zuV2p()Z69QF^%6Mp(BVfXD6Bj{F7mo-8`n%J?gW_-~RIw^5k=6=mW-42F)Oy0WV(< zLNYf7!L)?NcKc^I*3-15qFu9|DzaAZ8ojktfr%_LlkzLb*#QcA#n9 z9{6o;=m9~et!@SHR4RMmDLc@`o50QS*~)=ax^!1_LrO^`xq#HYUlsk0rR}+%8~Dvp zf4^S6zfNb9;opbxdtZ7ajmVZsf~@b{FE0Z40b@aYjwT{DaY2ds0y6owtbCG~j}Oa< ztfURIlb<+6#lhN*~(1lQmnf{+13gNBkA~Knq3xA6Xp4 zX{+D-oc}9cCsq71;sVKA$AmK^GNv}=|5ftB?t3HcKO!xyJugkdnS6t%(>`ZN_aFV- zk2f2;>69}3diEYuAZ}k_j4-OyMMsVVFTIcD+jh^GcFj*K`=1_e^qCZaPPGTo)X1xth^lup&gTx z33v^QC^y8f()WMi(ffp$LLj^k-M{dV_64#`LJMQu*O8=Y3n5HNY9JcSC!|VIMC<`I$qdh&K4}eI|5yO z0Ebd8SGu)C4h?(D@3cHd(WXp`g@Da3E>nZXfM{Xw1JENVGv;F;t7&Qky>!a<~8A;>?TY188j5rZKmWSgX)A-r|F03BFCaz1 ztP+fk5xsH>UO-6nVO6+Nhkc9WyMd_XPrIN+l-$I{NbFX)_RId+${_RG^dB3-rOUPt zrY_yE4Kh&JS!Ba27QSp%!s5jdJiPsPKIg_ikV^g*64nPLFLa#@1TG`m7YsE)bl@VW zQfU;C3S)#QMG^c9DCd1d^DEfDqNrEkp&`$gArvxb{A0cKi;DrH5;rIKhw!EpWMd*( zkrUr6Z1lejU?j*ITt*0y8)HoDrGD71G{`lRP%l_40`GkS5lht|8ee5MGWmTC|kAU&nkZ(mot6mx$RBSkGlH_5n3sJ zXjKj)Je)cl!=Cw@ET5Emj3H4sztkeGwbI|&Dqc2OP+AVuOy2iM(;#F|a`Qk^M-iW3(nLeSt6@b0X`p z5|QT(g>H5@*a<5r@Zyl*`OR+oF*mnM(+Srg$ZF#`o`xqLAB!|CceH*To zbS;J(xeR?!EqXJAHL4(RZoh2F|IIG&zttvw9Pq}%Fd&fqgh2BV5$vf(wTpk@2`1N{ z3_d`5xq(!jYKaEkJ%PFE>vi$A5;5iIIBXKHU^OCNqOTz;VGPl8ZrQ{y>Z~0xgQt*2 zVb$G|Fv)1{L)kd{{a+?o}YbkO0NJr+BkU6Z00X0NQhe1g>`bYbrxV zz{dX+g|U5SYS&+}U81Smae~c`Cs)7a8F%_dulmoYu3I=QlSWwIJC%2-Lm5>MTuv)G z7_IN8-@mC&=S(I$1%#7qGH zynvu4p5Izf>h5E99I;vns(3y)BKWVdlr;ODcz~zf=sd9tVJI&s*0q?9L0lOa>qMNL zP}-3M05Wn4-X7$d@{xHIaNlF3(Jv8aH~5fZF;8;S?Bs!suuu^;7%S%gHlVCexr~xl zfd_?HkMvNJbVYw@lVJIE{6m398^C0@H2@Uj(8gH!ZZwb*AsYj=y47R zA7AhImO?GUiSQGA6Dae{q- z{Ur$argYy4kjw_;os)pbMGE#@56qpo&$Ik~yzsXe28|q#aJHD#|F0em3Ku73QRSL# z{|#(B`G0tO^LQ%v?r(fIXwclDxm0H>85>l{+-cb6j45$6DN1Fi$kd?D(Lft%peVB< z6p2tBCsBxy5M?HFWC+iDU7POny`TH}exKj-$It6N-O{%Ab$zbSXRY^oulHK(u};mJ zO=k-at1CP0o;9y;gSl26>%iKC#0SIuRoqJ_D0F1KOxp8b8q7z|`TKC1VbQL|qN_TS z4J4W*1wMuryk?}Q@4m0{jB7cw>z@T2FFds0h=Lp#xrM7!ewN}y1f}U2pBQOlg9!l7 zM@As%@M0g4*n|-~{71Om$e1co0n~65uJq!~vB3+%Q}lf8$NzjK@0r?h4G1v=)SITM{$?S4}j}?ND{-T zi6(1}tXawF5ho-pj+WgpVlSa_odQFSUW)bg^{S77%g$lUJD-b-3#Jflf(<{01{*V5 zn@|$E4Y_H|oP79KX=pxa`RJ^mB?=AO8vcNqi~oB6RjX!@3>j_{2SK;KYty#hkY2$& z$ayq)ikbx+#Z)Ycz>wA!i*;%2a-`NDsKY|-l4R`h2s>F>S&|N-`U$vuU-?h_a%%(z z=)Zuxc4VC6^$jPigNiGV0HTR^KxH(7gGSTAWKV(+9_^-mw=22!H7qAB2~dbs>^iZ? zZ{NPfwWtOWD553^C=ks0IZ^Dlbt7Y@!p|K<3x=^{1snz-UJS%&9SB-tOlncli-P+- zFs@kV(JcBzgz!L1y7z~&#A4DRmPad^+!Gs6%e5JiB4kzqHB`6<{UQ|$qS$R5@N-*d z=S$R{O}Y0jJp9(}+gDKBO-+JOj}dMZvj_*>;(I_p%+MCd=rP)HGEg?n`NNDEU%y;v zz0NGA?ijP0P*>fLnDj5$mFhT>yuz?xZuEHM{7G$s)a}ODPrODil-`1N;dyrUB!s5h z-j@Ix+;(hSL7Emgr=HqdN5m8EU^Af8k@~0aR2>c~^Sp?Mha*I68-a={tE$Fe93Kff z!D2o&jDO91x$Ix>pT{Jp_Am_Gy7e@0iU;2wi4q=wi*-?27Ux#AX z_3XOd)#aGob$PR%QdT;2KX5bJ7)Rx@?^I(%7!MI0`sXiPv7^XO?Bp=BT2efn2*hDx|T&enVv0 zmb0OiD(U!IEKu?QoHPx)##6Cx0_cs^D3?2j{}!Ws*=@xyn9)VU!87`()`E(YYijsd zAD*3B|1se(4N9erfQvnS=my_WejbfXK)_;hf>2bpz1%{QKd;aW#Y_?Oaaex) za=rl*?MveE%t{s8Ep#xPU_&LZ6abwfggt4da5R>|x)7!d>D_y^Q9UrQ6GY02iA zL5D!G1z~-Rtx*QL#(GU658s;7c_n3#iaAjd`8&pRu{55~K#ZaayA}(o3{5RrCjs)_ zJv84)-nR#zC+JKv8C=1hcZmRN^dX3kFiL#JKcXgo;%C8Ad%8SwHO~`afmu3knKC>^hE$Pa)4(3Tq4KcW$${ z>yKWn;1Ukke^)wBBX_~w9i4?HbyOkXRC!>y2UqCfhzXT$Yz|G%c$+XyNbwT1=;kO! z?Qj{T3@c6J8a8!fF2J}-3@CYyr&kjPy%W04WQ`KUXf^84MY?y&W`PIWs-f{4I!O?g zL$GD65#n`X=*^;2UsuV_8MRW=j{sRlY7B<0U?Bl;xQV{kum6Endye1{h(G$)hB?%_ z1MxK#nqOHUdspTfOZ-X=>Pz=^&T({bSh8jfjiX!l6D<1*ql+fvd_^t5HDZp!zHQOj zuY0^sxy`5H^b4uO6jNS&s{0VdipP(r$E_>p(|A4&p3P&ml#~<#cx@L_V$pP1==gQt zqg48IO4+Q=IO-BF9tBkm4P{npL41Z;Wu91o-DtssyfK+#8d2QRGL>|lIIFujk*mq-Ei<-8S(~`k`w6zjak` ziK61C{X)2c>O(*IiJGgCm7>G`Z3hVaE5JXQV?MBej!pvr>;~Mj0mZYF$`(LVtQePo zGW=te>7yu|ruoY6pn|3OJUCC{M@y*<4e0KRWmwQ;l!EO>J1vh6oJBlQ%sP2n`&3)8 zmyLj@9av8+bQO(vM$er*%=z0ifJur|5qD8=M;`}V^P;fO!1U97<2BR)2z_->MiKwP zv}J{Wn(ENtz%kU*qqgGZZe#c?lvc3x68Kc6V~#~l4Z6Tlup!uV-LsJD$ZFtV9+q4H zPzyvk@TRgc!yx!>{wro6FIFIgT7OX;J2H=Fp|Zh9zpW*@+=k&8>Xg8=gy5q zW2;ds`Ph#Ryc>-ckFOxnA$cRV6_};n(4Tdz!#%A;;s+>0BWKq1g$pl(Iob_dgOhF7|qQDddJ38V<5&DQ%dxzprRJ3n1Xb^gpc4FD(bMsWC zCQdk0nQx#o<1fMKe0-CZiY(rK%{T7Jneso9Gqt@8ly}*m)(Ot!U#e>oa+Yt?^vTm# z=$=!aaBeDRf9>Glgt@Et=7;XpRcv*R&3RT=RwutE=?2^O_NnNWr%jS;4}a9B_9f_h zW7pGW{=oFv4zO;;=rPFuV}Wf)nQd+Z;CG(F_p*Di)ZyUJp8$mWKa;TmmEG*mra#9Z!H}K`_EK*R!5DwzW5eCy9w0Jp4-yKOdAvkYU z$A2Kx)&$k^Q?Opn-To}*%jvSr5RQ-ri9%!|b7X3XYFxj8xax z5VefKq5C=> znQ7%o=v?M0Iz2(|As`lve-ISJy^0RZlB=;^N^7pk_NI z`gRipexYa$Az|`F2MGx6)ZN0%mq&rd_!w{D*;pJC1DGtLsA!5r;#1hNE|g)fSiL$w z%1OPicnr&p`{hWs=U6$ubc;cIqqoXoI=@dk;Xl=m-%b0QrS6iv71b9LU882b)gEAO z-2KZD*Zy0`0ED5t2%nr_bpgT`L1?JaZc#2TLcjrM+)z!;gC+se1p(&iqwme9Q>_Xi zVPRJ2F%nmDsMljHHc&HKmC^F!pV22V286o-LZm{v8&n_n^_DOnsPjX|i}@yGdr90r zUmu@fuv6pcgAph{07WS(EiFwSoLFXa5v4(K%|fg9{t=y2I}aR~211d{HCkZtYjXmk z;7*<)AVcp3n}o|4J_Jtf9Y7)8v%261hB~oWA}u0YnGa)yXw^1LBRcH(nv5+ZyI|^L zYzY*x+a8=WWr_hF)xN~i$6yoFs`=>`l#gvv8O{8rhyk8V2<+(ELmzYyo4$!3Lb$S& zgLIK^Y4@7}u#la*xXep^SSk)Dn9pU2qyo^y0Nn<_nSsvKcXCR>+{FmMx-84bwGKbP zZVPi@ypNaOP8ve5l-{f`cxvnKI8a9L{%TrUIYtXutm$(E7g~%)9l)B`uN`9M7*?Ce z%&wozx&tIP&5pl!cVcO0SE8*J?=E%fC%-(Zm{8grw|#rcl(x0W-jlp>nM?Q`1i%BoyX=4+%+K{27(!SJ@a9sD=XGVxZ;s;2sC6gog6erJm{6 z_ktu$F29pxgJNxc^UyIY7Jt2uuteSpmZF}x>vxSWYsUIa-nLC}bbxB=$#}UlzcEYh z##&(%?T<;GKG++Tm6c14eJYR2m<&#;HVVuQfN&nBkCWA7D&~>e-?HI>S(+)5nb+8O zmg=YnGy*s9W8Hojr$YD_cz6x4w*@H1zKcEHbbh{dX4ezMvT9r?#48g9(3FAe(PA}) zdFFwr$*>-eOOaDa;`7b}b$>x1(xgfT|stsS}T-uVif*Csc+QOf4fefU#K61xE^Jlc0@ zz{!ePQ<-bvp4Kj;Z^mkzJoyBjYsCtr1B^Jcg`q^}dNe4kouP9YQ;1(q=xLnX@`Ii* zGO*tg(*`sFIYM!+Og;{^4h?EcrL>(v@EDGNwd|;V?UKC@%g+tAicVUVn+g-=o3Z;H zpfnwOKxw&BY^KLZWf?y(gmZm+q?fn*78s8Xpo?M6Ilts$#tWsv_S`K)9=lhaVbcSJ z{m%!P6YyY;O4bwK7r3uN^k5TQoa?#Ea zoPtuZ;(7CMxY<7F0aNCYp`RynG!T#HJz4=7VB$Ac?IR*TJSf3gvqGOdktvMOd*z^E zV-p8J>vR43b%{?KT2t-;rI}KUiq>=YhmAUQ*~4JfaHMG+eWjSs6`t|)0=A_(IRJEv z7K$`P11vp03Vqf-2BY&qOa~u*Tp)Y5JF^>EIwByY!eH!lA*(BdmuXJO#nE|$ zCAM$#wa`@QH!2jFte+aofcmq1PF6OPsm)wt0LLnQsDhKxAq`pi!)+tq=lS z{u^W^#}2O7)Z3Tk^A456;n2Q}6H#YgTq!0n^2iF%?$!S5hi%A@#3BO`2vEJ1OO_aI z9}teJL;?i3#q(}BRe8kb>%%e^LD#aVUYdDBw>u*{lKCqfi^GUx;r&2y%kZzZO}dzI z7Zwwu=p$TqRN>85R~41AeGHy+`*J0%g;^9gGiVHq&YO$K$ZY58Lv1?u{EW)`%v&j& z2cfo(zIrl1ecG0fWBwvMKk*Rr&F~*TKTt#foX!&<_G=bt*k}q!soS*qz zmU0k!d=(%Q(eX7APw(yZRrjW^=DD{)p3%h}>7qDl@8ca~0(c+THgjaLR@b0x|;_&6BA0d3_X$HCr{ycx55c7^AN#duq()Fm*7TTx12ODJ& zuA>5q$K%SCqCK7pp>Q3uWlRBd&QBAH2Dfc-AJgb5W1FhIh~B|XxBhXlsUE1C(ZdTQ z!p=DEeh!AN5l#d<>c-%=FeMHb5ipFhIB>l-u+n?P4&E`_VGocU4*=x92kX}a$E62A zK;gcGS*i(Q$N)5O9_Y2>U8_Bqn-GLsS<;$`&9I9S)t8umaFY@&Jwt5|qVPlhNt0|# zfPPw=Vq6EJ#!XvvpV(&=6$hmZT=@ZXLc#I$0}yMK-;YGyO?ihnbbgJ%7IBql+#O5J z7u;GS=-MLmfe6CVRDZ}_Il1Ieg6f;e%;)v&h<r9@&G7p9hbe)8PAu;ZJ-P8C2d z+HuJn))1#wl+FGPhAdTm)JvFRgAg^rx3?8`qG_VyTW0Ou@|R(~7@m4O;ckM<6=`Z(}$qRO^2^GVx01pisTb9&~o@dArvEX z+76Vea+urhKRUZ~<)ZU-(oD+2 za`GY2c?9;jZ%Fz^6S!TO{|}+SvA`12@E$|F&t9;C?yN6v3btWsX#? z$XUqT7~Q8wUy|PSs9(400y71O3999TVvypX*8j$BhuXQEd?*ab55ei()3N1!e5%Dn zyV0q-vAZ`Bv`5BB54nRb#LD$x-`;dGs4uH0{bc~UWvX+LIjKwD%UCB7cUZ7s_|69G zUv4lu3W-2)LviYD*Fgu(+w#VV6)wrx@}bEsPsM)jQs3)m&S^luPM%Do;5|r#j<055 zOgQ=QHJ|ZRT>-XRxr}QG6lg>`3#?}dK-*R>4tp26UEq8_17$)%H&rX3u;RC&A9YG& z{3V$6Y%`2a+jn-u4fWmoO^;o@1%^nSOAUz=n{V{rT5DB%?4e%vHAZCCc9Y(j|Fn|QMTf(x1oW7Gkaqhyub?C z9GkN5CC=7kUSQ=nfZ&|W4aH5X_hp3x1m`3m9}$k8K8~p6HMtT2P>PLx4;}KS%w7H4 zx?68csH6QtB^MLOVr^IOoD6I57|3I&gg8^?Nl&m7W$rdzJ)=YSEC_fjNDVzaJaqAp z4U%m30W(_x>!@jH=x^VC9gG%v)w)$MylPvw>fk^noTOX@HAY5HSqs8|P{rKCsT_cS z)e`&&VC(kl+SBM$+-5EZB*Y3vnk2=Q`cBKatq?`?5bzFKsn%$bM_(SPyOUH^fy2Im zKmp9&47T>xEYJ~XYblCU1jW7p8C`iBr`2r0TooU}S;UFptuM}!$OlD$Kw%{eBL@|6 zE>7#1cl9wL1PGznedItRu!N#|p_izYCBQR>i+u)RuK}4L949q3HC^oU$0ysruq+rD z&q^^6H`tk`ZIX zH=MZA=Dv!d!7yO!UF7d6zW`=TE%aa{(4xoX%Le5A{AHQnZga=o^NgrSA0B=f;-&h8 zQtJXVla7J!YJ}|grks5wgl6{H5{U;ehnN-TlAKe|LpHH2kvkqork&?q{wB02^#huyNqB%I=dM;q(Gygsk!3MoQxXLF?b!JY_9vw~3RBxxlkM8P_vje7Vg0o{zrb&Lz)39|h=Mln*v{ z@6AwA!~5ZY6bR5jHrB(K6rhar9tc}Y1Z%4&zckv&yo=jXOL*;@vbLdHnu)YZ%jaF7 zd%6YRMCyrvK5pSi=^aE8@G0nLlaEM3M&cVS+D?c=gQ1ej`q!Ibp*~{R@s(zR)mov1c%8o7SRKYHR48JH zu9r_GHfQQ%*hcxWR}1@afSlKgGcOXk3)#)0dd6q4lv&13bKk6-eCHbv1v0N-W!%c; zp9$V97~X#c6bP>raY*59eetvEDD!{3RCz{q{F!qA!K8D3T6JXuIE<`A&0wI50#1C2 zz>YhBv_tB=S-dx9{uEGAN#&Q`n>n4Lz-&J$BP4$blc1@gIumC=3!5#i2>?YQUwh%h zfBbIXgpLyaiH1=I5K6rxEJLw4*j#L__W;@9=>3K`ZVTR_7>Egxsl`j)6Jp-OZE2>3 z7$`wRxEQS*g+>;J*n;_>zStZfLVeIAPO`j(;eDpa3#T1uXo#PSsQ&XRe7=0*#XP(p zrq{`vF8Pf8?Poq`-iG7%;n+iQMe-G|=A93Mqji*%{>N{Ne36&j3bgNy7lW{+7b6+t zB&0qZpM(&0`-XhaL3mcOmOFObEOj|=p1D6>a0I{j;Vm#poa{<{TR5RX^onORqxo?hC{#Eg=bWL}-q9v>`DTq)Xrs zFw>5>y?;_PhVzhV`8X0fiKl?FYi+8x`bz9vSJ4i8Z3p|lpcMVKBVb+nm)bK|p*Gob z(#&W`KjmNpcNLqSIvhQK+29ZU`s=syl-oDHyLSN{1Mh*nF^qZEeUq3(gnN-7^!PkV(_CE>`^EIbEA4ZbxAKuz6x)h+ z!uK#CA{?t-`>_?Xsqz73_a>G)ujMVh$T-+2{= zK-@>j(?gmhsHVufy3L9bTQfZo)C$xu;@O|Ty>qZhD7hd$VS*0>8bGqxgUxrVWp=AF zr?yaHK3KqpxMv8&c|jY-0vpK*&~2lYX9LLDLOOJy1hRJ@bA3j$ zUTXd1;oz39s7ja$55^~V9ERy5;47vz6hatBi2_iv?OSg09?B_GLH6W1E0xciZ{<#Y zNhAf>n!gwH-wKTYl6(e|^3&u=%u79j-sH}JuS2Z#7~RF{>iEe)y_VW;M7Nx9D59o zhe)Aq|90j&btdm~xiWn#d~g9GMPkj@e>(YV#jCFRcN+;al0}6_L%PfvS>Kx!?@d`G zza5r8r+p1CZdDVD;hzbqae!ylmoMK4r_-KHlx57I+f0$Bx(TPLG7l{!wL=eDVugt% zN%af>!;*a*5knVIpQR%ZAn%}y8owe0HPS~bl{K%gd~OjdG3z36_#9otV!2VDuKRQA#3@ae^SvdL2UX>NjM%;p4E?WO93$jq0-wRt#PN z^;YK~Xo*JjV+9kz4F)F^iR0avA94{9AY~GpAWSnv;(!bj}iwK+d`EQI?-n3|UbKWeSm4adr{o;dQmq06xguS%5}%zi>f4 ztB19?NvIoP|9fcregc@k(VlWpn~9Gf?5HVt0$LN$x9jyP1WkHKE;xCkjtpNRe`FX@ z%}@x8uT0{aBd;GK1McUNHqd?R%o!)c@jM*k5ewXY5Wjj_i+8GeP6Pemky0R-X`QIf z1=JVEo`fC4tJLvK3xPZyLl92vmC?Tg48Dm9j^A{^BX=I342r4vR#SY!Ku*DFX>ra@=TTIuE#%oii zUf2kIWC2E(gaD7>lp4UN`z>;;Ut%mi_wiJMTp(_vLF&BTAJ5^UOr^oOmbtRP?fBtk zK#PvAeqLUI@cDg8OPGJDG+H#9{o&9$LWK}{`5e5NGk_ST2r~x1VUE^8s3aU~Z&=u& zt<#Or2sC@v#;zdBPn*lL8^XC1LkLtCuH^x|zzJwvAr2aIS2TeVmfO>PZcLM%(_Td5T31i`e zH44PvS21s)+&<%%U#e0b>`^(kinll!cko%rDf#i#0&X*F5$bk6878#_TumPgdp|F4 ztOk<0M3@ZT&WKcgx_K2J#^$ovM@iE zN=ix=5C1goe@biAb?#h;1R4#k(u3y)k}U6DHF+bFCS6^5%sU58n7?MfMruET5hztM zJb77umTM=1MNk~}P^emh)LE^$rZ{yZtn5QC`K*Ie8;@@6Bby57LJPTLT%{ zBjl&U(BikAjtFS?D8zleqL4M=OQaSNX3O!km!FZzy6_F&#tMeNugPpE62A6d94r9I zLo_Btkf{lI>NCVE&VoE(>q8wZavu+1u2CTzMF=WW>oLYmJorBI0d8sAodoMC;;gy0 zbC^M3EtyhW zdWN`e zlUvCALuJTfGd_SP9>sHC9v$Sf8n`hxwi#L)Ifx`R3iNNR-p3r&-TRqe zP8Y(}VU8*iP@kiYGCvAD2dTR2mBXMmksP~1mx5eP6xbtoKc4Grn*>i<-5tc(7na-0 z&j6CVU!9Kq5&Nf(*oaP@-n-faq7w*8VF>o286^aOR1|YG03bXzg zF8uPGRjNoWuFxq%s>IWlcaJS4&Bvh$%HAIzfml`4EHx$mFz{+$<=JVEPJOpV&}v8w z3gVU7>4$h5AqJ+5ij2Vez*;t9nxnmSonN{2ce?{1RVtb-$HCv`p(%G5;x1#7)gTf_ z7%9JqXYWp00O!+Q(DIjAvZ89~*(Dp#A3zkhBX&0vlOWSZq+K6!{9WXu0L@NoFaVU* z;|6O*3=-rZpK=!4ZpZ@|ljNvrY8E0M27h&#Wx?=o$_?L;){Qt)k-Az_50Z{ZouD)L zxF8zm0M0d!94UaMLf3n}NN|b>_QD|7-)iR6?v8FPSF0WfGWV+7UJ2h(n7CdElaBu~-1y9@bs zl1)%b+kWte18Ep?E>2=MT-~zh-IVoZfv9lQW9_V4 zsrCO5@D)K4cq?W*aGHIxD@r3g{`_;Bjg3ueC%+Gp+T-Q;1U5omHnwhxd`8k3?P0Z< z{xR7T5P`rQR(ufxIgTm-+k^qI`#x;&S;-2LkhftX(%Fg*-ZpiA`6aiKGTn&U>vNa* zqt=+5|7&SYB%>5d1!pDQK-dUK6alaw2~C70LS=de&XDF~3m_lZy{DErK0X@jZfSzF z%}zjXWmT??Ny$p&`QQR$hSNwxVlxsC$$(I$SR54@6v`lA{LU%E3O69DXb2q`FIeQ8J*3vpj6aQ*Mh=KoNIcT6%2ldE_uFE!sfHoRbKq(h_GN41%$hd zwRlPK>Iwh^B)sDX)=d8ORRZ?aI60nMtQ_O=Y~R^~H&<&GNKrnG@nL`|6eWQzYU&JG zR#jEy3k-<>2UPHfuvp%Ho+3YBl3qL;x-ERUy{Ib|oJp%YfGn+|r&VHUAu2S8QAbcz zjDi8dnKQXft1$d}8$4JS+9y2uYK9CK_-WqW)P-AI|&3XEQnN zmA|{8?4(lHt1<^v2%xSsAQjddOxL7e%Kp53*?tzgIv7F{_!BS6b{)oaaEw(kgowqy z=|N&7j1WEF+#`Jdc$~@{gzV>j*vKp}tr)+WCo)l#4Mj{v>2#yse_RI3=-P`#N5Qnf zIy;$9;}J<}S7BJ|0g#IU9Nz;-ySuxuz<#FCiPFTVHy`~<1N;V>z6vf}_yAE34we-- zOT-;Mt#?=+K6t4WblZ)eo+X3#5PiAQe~51MSQ&zrCV~*C#14l*e+9xWBrGVgM9FxJ zNUnAK0dp%be|{7eH5`&LKs!AIc_d7u3^L;3>#vy82y6VUHVq1r^Cu6nuv>*Z{O~0-#H+v8(CxTr8e!~OA0eBI3 zH^1f{=)E>iQ|$RY;mma$_a-dd$A{);2N7^a;d2$=GA@Y!Mbw#d$UV(HzobAKfEA4G zO^cf2(CDo_{&oA2^E`Zj!D!`n_B9P+tzL$IOFvSdgHYD=)m;2geHBE9KGrGin+ay& zCb;b6@@V)_ihn`aoai0^xFqein1saJB*G)4E5rLghC?^pLOV-`ebT^dB}QOzxCd`K zqOO@XDh`^8b^rNu=faVkBWD4&bA&QoYy@O!gtS_Ga|}|PB$8#+-cODOL0lt%<8E~y zj4rF=F}Q7uyXYVqcJ5c1mD%|iY3V^-idLqYDSzds*su@l5Y zA*4ff-3Z}#eL;DE0zJdEAUPFK0p$o#4w0IqpCa57mYo#x!kpQXCV8k0{Cvn2(tZDo z3FplZfKK2=)NK=EMiQ8ruSqseVmpA(ci8s=0s_HEsDy*ZrX@xoJ(^pLAx2^x6dG8i zT8lZLpoF7~y-=QD(V~QFNd5cBzcJsPg5yt9`zF6;7+!~OYfm8&^BB4eOpn?K_l{Jp zYl0%+#CzAlP9eWl>kogdc3ObX1wF8Tz7otWxUKi#LIe8Y2jJ*rXxuHA5_b(|Lw$Zw z`Q3FLXfn%jfgUdy;*8f06Ie;%^&;~&U2~ISFRydyk6YUCB1zeT)$^N;jbor*=IrEh z$*97{_I401s0w;rr2;7!JeGnYqq1TNIHT&6OXZFDI*8$vRGmQ5WoOV5lvbh71ZXyb zFy0dh_njiLBz8bNO19(^;%BFYwk;-qkH3xTPU}+C`LQ|RT)H&5B7%Uh(I|VtWG$7P zoh$YlK&t_DmTlzD#OUln>~0yM(nISK-N#@DqIgdS`@+QfR~tOwCfA|^ zH=}(u5ZEkoehUK|iK-3?7`@mO^U_mAU<8s&a-sZ59Z&g=KS~%MAw9-HFHUlVMX6{Q&D)2+c<@7?VaQ>vL3Z!b6g+!s3Xw*^Y9{ zul?^c3$ytqqOWzb(HI`_lfPVATAv;r$fZS`d}OB}Uf502UCNIju#d+3Q%;$-JmFLv z;gbq2s1>1{J_x@9sFkPsiBwPA;C*%jP?U8krp!rkL?Ie4cDQSYsw+K6{BY91rsZJ` z4H6C^dlWIkkU>niyBdJZ2sFGf2TAY~e^Nq#G@{&ZnZJ4-fTbY4Jeva%qX951CGsqW zp!~ALxqS*iwd)Ij<7NmMJF`2a`YHi_9c!mG1a1P9v4O-$my| z&fm6Ke7P^26cr^U$KS3ke)5+!lq~U!Rk(m&ahKCTXddcc zcjVL85^u&?NK+uJ+96Dh-{{Nwh|pJNc(u7ns<^J?HnO8Dmuc9{>brotrMff+F%{Up9W+N#K5&B>&6rO(H2Wntn?Dj|Sdawoi z=F_NJ&>YaF-+GmFxB5>e$DQMfnN-IT04%aYTCqa|5(lAHC9#D1DP;dT+d+_jwONE6 zVg|7U*XP*9^Kev2mrzTzvF1#!x${73RkA)?Q;Ax2Cj9<>M+txVJBz4Bw4^7FNM2>7d7u z4;0tH9!RpwmI7yl06U`k)rDjzX16DeZUu(a!8#Vgpdo8o5OTO7KO`pslYBEdNICc? zj=maWG@p@ISQ)=V zPC=o3ACHz;oJW}}B*8dbB#uaMig9ioMmpP{N(=iroMcy91LL)s>=X&r=#{E(!K?B8d!nh=qq(hF-KhEDEdMc8GCu%!cx#7Kz5-I5!d4b@0OA+hbpyz zs2JFwzNLqKmZD=s6ovLUa7~1vVEB=kmL0Blqu>(W*@*IsrLowMI{-gPcLf6GAb4C# zvcg#22gzZHQOS~t8=#EZ2GF|iW@zxGoog|*y{P09ARM`eoK|1Jc>9L@O^8LhJEK?v zCLwCRCZDPlwn0==j!r_d2~s;AW%fFpz&JP<@y0`?Nc|`5K6lIdDMwILKMyP5d-J9! zrw=KQE*$rIps8+A)T$y+x^m6X%+qH32J`2kheV}D$2H@^J9nn-b<_CIRES4GMP$e* zvuUiOt=`&-+jMliexhUl!-TvMq~!jMD8#qrN!67FRmjDa_Vrf`wUoKQZzB*B;~*m+ zfC#km5EkMVx(qhPm+9F3INW;`*@K=pX)Td+>jn4^eZEp~T5D9>pQf6kV<8S`?>=I1 zQHrsk^xV?@U^JqTDivF{?C;h+nm1Nw>^p_}bjeaIwqz+D;4wvOREMeWL~JX(@4loH zXiEr>!32T^#pB5>X$6|4NHP8yf(T+}wQoass2u3vlzl#Gm-C&cDX0%99+q<9BP)HO zCZt-_qt=P==%o+Tafefl#b?z?ETlsP1#%?xxs(px)rbBaXSr;YA@DSWlX{fRcK0gh8@C9}P$B zUvlj+&xr|ZBMnT^c*0_)J^(BhQ!+X#6Xx3p9q0OTT?ebmq7jQNI$1d?(HWIu(b$kp z6cs>o5dU}@uo-0o5{8IhL_9zhdTQK3WbdO8x8yASgX}2ina+#?vTCa-t$=4{QtH~@ zyq?4y5M|&okK{}RHuisSErtj(*m?gWL~%b{?1j!|gxw#$Hh{d@@28&Yi1|2)k`ldIY8Tk{J`wGA17^Zy3U_ z9jG2RfG&XD0Fj6FkzO4rs3DK|wR8}Pk#M9;KPe({=C(8}kAEgZ9<498^9FJ=y8iP+ zDvJ;SQVw1&5voWP*sQmQE0DjG5%Lo=(jrN(^?F!tDBWzVV_+FkOuy~Uo{E`sDWA%!H408pKtA@SyVWKowTcDaRHQcR5_BlHX5r)V56I)yQCmr(X zEh48M!61Hg?h9C|yug`wgjNLLb7t2*#6r=Rn=Ts_$>+^whG3x49E&&{D4Xo9D-bz6 zZ3w+(b7?1X3UyVaEd)7~(+t)ZozV<(nsIbDZQ3N82tKe7#t}G$!+rm6xIw8R7pk5> znF$>Rfmpoi4@lI>5(P(I=+L^ISH4#aZ`^Fya+7N@IY(}p&!J*)CB3t=&$sky2kNn$ z_S>9&4)f!HOIrWPu0fYG*SpVHV%JlBM5y-GFuC9PJO+TulK*;h*nTXB^yf&fH)V#8Y%^jBkbtRaYm^*RvA%a}(8vareeI?{d2FI6OKi0Xj0j=;?*BmZuxsaZ1*8& zT$VtTqO5Gmy(fT3B!vkm#Ud7fhwy1yg@arK$mAa-dOb7(Bg!OqrlCas0HjSh&7d_J zDYUdbg_;!-Cku1DmunlM8jsC^j5QC<14wS|_{|?r(_@!-b3!^{14%IVnmT43aLqC9S&Yw_88+qvNy;)AHef` zj;iC`yhk|jDWsTMu;hM7X=1rZT!7PwLLXD-3?$WJg5v%?uz;7QRIF_0pGX6$NU=l; zR3gcOLGkQFbe&ISj)-12OcG$fUO5>eeoF|pXraE};KkBQo9WtcxRjt2(xr|J$nDyE z?I#T(;bO`LP<0t3>-mT;w*N6)US-yK(w%z5M!_27>^`RfL6H}Gph4R;>-Z(}IizYq z*{Z_AVG-yrRIp?|2ku=rI%C2Og8MMNP?y1Q$4p;6{A5Wx0)J|N)Z(H3D1)eF94t7u zw2GAPDDu$0z$1J!{=zblj*zbpJ#&VC@ZClLO9&kXhN~z=H1InJ zEw>mKiy7WWK0Scn?oITrOrf?Iet5E+H;TnCgwFBN-@^M;t4Nr7zN}6nV(Nr{#gtOkhu{ag83S?7v)a8 zZ<#*>tAmE+AG{y-EEp*mbwPjl0r~zp4E!vN-nBEa{P->EZ$@kXQ~tAgq=5Az_6DY< zWAjE*b4_@X0xA##JvAiE+qnFEBej-Na6ADPLIu(UU`Xl*#di_tye>$QtdJ#zZh701 zy*!Z(BN4}rj)K=<2rneZ`S`Y#@-8=;?(KA+5;~C*r`q*qO)h1-Nz*joGnaXd$mCht z(BQ!!_w4DqM?faPy~UrG81wy1MC-U=t-HM+15B8q2if378csT|l+ZLBJY3 zz;Gh1fQb*_ZYkD?Mt+RsxFmUl(UdBXcJ^J*yZj)ea>ZxGD8}*bc#~W1;8O&faa3nB z@I7tF05*%nnZ#6KVmX(DTreplOs7sR6b?g4kTV<=xXpa=`RD;;C1EY6{=7|>q_V}3 zWD$$vvqJBY>yXP~Z~}-YBhAuM(cSm~bf`KQA<)-T5|LqRw1)orpUJeayjJ{)MIZ}n{651Bu8REZ=)MHX?%)}`PG?}6@nQ5F1; z*C@Bw9#c8PB$N;DM988z^ut!0CvN7w%58T})o^{z@DKdQ^2sp^#L{^RWg@{p7;;@~ z5sG=K%9|SunpOn<8SZ+U&UB%jyvy-#_fufL6OksLfTK_7gOShs+`io+zO@H>iEl^= z9R%Ztw26^hJkQR2s=UTCR)IRmcWy+SWq@oPrXofXix+zE>9+=uvV;R6H*r{7gb;v8 zE<96pIInBwbHIDw+m7flr>cCQ6t3BbcaC&)9gy5ae=KR>^U;Z%e2CL(-wubB*Bt;d zFmXhfHV$_8@lw!0zril$1HPv$vO*{fb$Xz~HvX#pWnAFN6OmCtTC36E4KP{E z6^7Cad<4zVlCY>@yILlT$QkjHw?!IiM#aEnMa5zObaP=lv%u-6U{nA<%YPd6XdG2! zDz$G902bVcdBIy)ok#zds;C8PgoQUDsY>!`WECkPJyL}3%DW^VRm4#F5P&^KaJrx$ z!`-*SE4>3MG5+I5O)k_nT?mA%(0B`Q-qQ|{S~-mImdaOaJI{aNZV$37Z5c=0FHXQXx^R@&b!1p4W?MdXbV?}}W2Zn%K$R+lV?k^}G2-V+ zGW0<}(I~|4#M~sly!)Vd5ZMamW3EgVE4&H=&v3VXW}~g# zf6=v>^v5vGUuYW}Q3CHH)gqx~K9*jSR9kMN#{BSI#~j~~%{XT0WEr3w_WAMuu32(g zI9o`l^|)(o)rwSR zkh}fCB;k$v`tHhMUQ(a#Us>!BtEvh$g{gCw0fuByiPbv-QxA5or)B`)3;EG~3g*rZ z<>s`|R7+_Psxg=JAuhYdNpxPD0kI;K8vm*7)HdROsAJ2+?cz^sZ(6bU-A@P4j4u<1 z6xA2VQc>Q45_%N-#=8>6fyIWk)BwNntyFX(qVH&S1ZsS68E@l<{;ZWB$K!@*vcks*Dkvc3rIGB zOY+9|pO;u(6zcKDnBm-ON9$pZM&m*YE(1$3?Lb~xi@Y=UKLqE2V!mA`M1bwO> zP$?vFK^VY(A0>wmmG_L$O@r#{>fV?mgM_txx3-oRiTV&8?)q{FX$sP5z>C^J;vI=X zCF0L0^9i+afx|^L=9zc}yb@#kE1W92*Z(UTWp6m*T{-DL<~$MlBlreiVO_fBJLUzP z^5c;wA?OiUMJjJG^aJhvelipb*@%}FFFa8eM1eQOC&;BziFyPw9Pseh20Q%Hs!!b~ zat|SLW`a^vD!#9q8}>iS&wdYz;Ym}Kk9}qk+oEZWZ)1%COth%Vgjyp~))Gu7@D88H zRR^mJlzS!l2uNWEKXK-!L&|XUpn?lKlrg~={9_c{6wx#|*7e8s1~>QVhqInt#;_yu%K+L7k+433(V34W^p z(w8FNh0@(#_7UcK)S{?03=r0vcr#>avenLGwJEP(o%^17ftwJ7P7l#f1ERcGqlA-n`^=furq>W9QA-U9Uet%h;v!Gr z$E~olDGP-_u`VGO`|%L~8&U-2Nv-H^0AnY9pZ65pJb|tAphh>V-L@DyNobrevNw@p#+0+n9}-z2^@Q**e_1D>igs-#G82?QES6|__rhp@=`>ahFyNV^tK zyb+4$Vq8WfRwfGPl^aaC9?%02&8_pBF@&@?mBDBQ0kL8e?G?Pk^p&lM5GaT8eW1nKXFUv(wH0qmz zJ1RoUpu0%JImq!YL&O}JA9c*&r7z*@5T_#`MUJUr09BzaX+tj1MxUbY5Me)Qf4tCb zcRFgyUoAQ%o=a7mBMbww6(r~)p%sZ3NiGw{3#Ht4A3~;-l#Emo&h$@ud&n8poY-e$ zym)KYCWtr=f4&(2e%7b@it-n_IaIBmruCV7cTnXx7p@sh}uDWva%aFxOY?Bv9@mg!S-F=Pg zKSMPXgCGV(p)e$JaK%<+A^x_OlU{!CSuOiP@zqD((U%i2l3f0-Z&*Uq{;k1_B0icr zM#8Kf=Y?c9WzZn5dPf?I_6QYhueT@1%c>+ya&ip^eajVN3F&>8wx(0 zv!ozPuXOGDgU>wIjMm|98_O5Cvi-K+Q7GDtFD)kE!3~Cr-Oq)LM0fM;fvG&!%HxWv z#MTdw4ti4qR0?+7WyM*Gl~?;ob^q-j+!fOC(vfvLi1(1fDuF!yV?8K;m!voEflT`` zSUdpz1Vfe}sE7LkAfeMZpdiklX)X7t{f(sa?H^~m?{-2XI7*mqlVUe;5o#%1NY zsj>G%zAXx4b-a?+EYV2L*TnyB;m>vcpM2}`GaO|7kL&7rM(fA_aR7af=WvCqu1@~7 zDs675O?m2%4_p}hJ-g|N%4q&9}xG2)EEWU6k%9_mZjbtqXQ(FJ8a|Pe4~$k zI^0ME6%K(wzCCr4Q1*}MhqdzizIvbkkDEP2zDp7p=ilR4^1R5wmAg3t1rnk$j#1NBII}YVs_`-Zj(z3Y%)7cBu$@)Y&@S8 zx~01JW7ZS7@wuj@_ctvws;hb$&~K&Qerf2CeATU`;YV`6>h|+%I{65CJ(qraX-Kp) z&Bpqck=#D3eXY(*!n9ja z_HWxxIn#jnm`5NEVM068k16Yl7>gzRB}y)6d@8UBBkpcdp=));7=Yw`TG*7 z;bPt~*O5NLR~sKSz!+RCzMk41lAWcrIT)R8yRAJRgty!@eN#7}eKL18j#_p#KEt+Ou2#Ot_hj#l z1F4p8OqN}>z4bn4R;$hOEw#7LT3>k`>y~amKlAO~Y9TdflMx2KhXTs4bQL-=Y$Tlk z%7mhEMt7ZRehdV5yq%~R&`bU3DiYSrof zCk>$iuSc;mM`f&;dnN47yGFgVadJ~wA%6))e0mV4vjCc#r-#s9s|o6sVlW>8o804@G>Z{w;97JhAfck%zHj})o8FH;Ma{A07DZ_nc4t`o zj>QE8Y&~(J=Gnotlt-x&#i?@#%1qw2`e&XJ3#-?{^t7k+;k`TMW+ z9j*`f6UMH{d?@psZ(qXB`_}!Y4c=+^wYUX-TPlpc=N8;7Xg5{Loho?C^2^#W&#vsb zX*7#X(} zZpl`A=N^lnby7ysY~h_dqdlWuJ>ymw#DuLGf8kcKhK-3!nyra}{Fj!;Z{K{@{V0qu zsxohSaov}DF$ar^iw-L;N=Y!fedJS&1Z#2fo1F=Bj1+QCRz7WxK4R^>I`J}&YW=B@ z+_|Nj?v$B|mCdZRzx0KTLI;4wsdn?2rx)m@KL+o~XE=-Cye5ae9f`x{ZJP{@Eq&-Kiq^ON;Tc z)|kfmdrj7=97*bs5y^dGl#y+ouOt%RraWt|k>Xk@Zc9vl&T>@s-?Q#eS!dL8M2cg$ zB3pc%$1RB3IJ)SY?6#(TnukS)nY{Po@T!Z|njdW&!i&-J@uB5Yu{eV(-q%`|t&M59 zmG*ku_x5s4r&@X0$U{n#;uf{qTskJ4yPztm+UJg4-LD3vRxgAu zDqLybx++CsvRir}dH+=a>shAv<~{fIxqkc0G5?#-ecwl${3^8H@Y|Gya{3@F8uNnA zQbR%X&fSIh%V6u)t$`$U!k3G(is}lx=U3;|3wVh8MK#%)*eGpPIWJrs)w*k0uY{3i zWu44(q3Tqxbj?*NUglRr{?N=@c4yi(_9l1z2}hTSSQ2F1nbX`^&;LTn=uqgE$b|13 z4OwbSJpx$**Q5(V-=*d1&b9i(r^Ug4bCg7Q9D(`osWSd2VbB zQz`H6%R2^bW9aD}RX-4PW%idi`9a^;U4bUc>SAtx%Z;LErqWw~sw>#l%Y0viOZv3? z=Qr(X>M8`R)D66QjxNKs=u~i~UdNbyuH!++u5jvu`{yohNx5RXv1HslgSu1`Bff`OFD_^*uYf`^|IH<_kCW&2ZF%#n z<#DHbwZezr*dMH-)lP1dXxvpZYwl{F#;rzw%=qinL^me}m|5*gNIh=0_5H;DBqyO? z4oZj>tPIiZ>3<7}0jO8z0f_uYdjj~r`kld9qb9Rw_oAo7G@QM}^7-vxsHW@xBJVB3 zvfR3MVb-!0!9o!MgKh)?Z&F32yQQQ=xs|bSd2-aZ4lJ>@h*t zyWVF%d++xiN zhdw4(?^pSP^w>2|aWWCHZ~iwr|EidxnB3qHz@s{FPa5w}APck~|B(9js=sJPMEx=! zXz5_Yg`(w{*^mqr;7N!k%9gp*U%vOIZdjjZFJx;Bm6wFuSKiK7Yy0Z(%I1`$G8*7t zi6Zgr#NO*p=afVs5R`(tMpiX#X(!Y$N24FYEj=X+B_CUU__gQtV;hbHv1m3Jsp5g# z0yBtOK;4dD*FDWDBnb<2+F&S`P7R6gfmCi>w8FG{Y$5EW0?8j-xQm30B&#pntePQ8 z>)e(x(R)i>fAK)g1uy)(RlVju+)CsOyzM8!bGQGv^$oQ8mp>gK+*1k!(l27@S5_D8 zsDswJ72o`)1Bl|H;V==0r}ry6cnaTGJJCX;qvpAzmK0j2Qm^Y3rp=SUBDqD$o>c~Umz;r*HCcXO z>_}-7Nsnj$Y*=U zBzJ+#z_LdE<&1XKmotqM%OXfiyc2(wY7?i&Hj1|{5nxncO5#y>y@8zBvwWBS8Y#BlRF1xwuc7rd4^|6 zF6j4j6?}e}XOR~rkT_ATXup)>AImkhET>|$^6Z6`PlK}5AhZ%|o>$?b9~p+dOn zG`%jK6JJ#}si5d?>SZZl`edFbdm z!3?WJ9Cu4lpzVWtDPx&L4&lPBNIhXTw#%0;`kFCz-XeOI>6+;LIWk#7->!( zgtn7y%r9}xb{U%Rtrl7J+Nb!k&uZqSk_0BD4d={S^yUsXNPZ9~VQ(g$l_yMkGcxs2 zfJ4NHFaKm1nm?gj>I5%aK|`(4y6ci6)XdKewSSI6nn6a5wn1~fy*yk)SVAvDZ=%wtRw z;oDePLbZW(svRjbEK+s(YBC1gx<$M)Iw68r@B2Ln{D9+}o%Q-lG{5-E$A0^E?W()4 zBy2+%NZ>#ty#8$I?rWa@s(vTi^x*KdT9wB`=HrhG&z&6|?KdK~3r%c#JJd9&FQIL-P(LJPoh=+t%02EhJmxQp^=Yfhq#H!((_Ob^;c@`I_UDGw2X6)!nYer>xwewBIWE=fNCpEWi^DH{U zUv-XKIkK!alQZ-#*7WHJk9`<+xZ&BUHB~T}Gcg^>SV?ZFIy&6IE~fhObgXB}z$E8v zsaJLO#I$M17G(uyJy>P>`$~?#wcS{UX^4bCu&pQ@gf_vEy3TxRnysla>E}AKnOr~A z<%hBg=oBTI;8u%Vax!_9iYn6J+lg|`aO)KfF*?<4?Phkc=A}RHxBihm^NciJ&|q&f zwoe8E{E%uIV%zK_(CB0~EB)@TUWV^1n3kp53>#*pSpA4|&D>Y#Os|zF$*Xd_P8{LH z$!tCcN-F)AZq*EDh+B+bSc?UNgvOt70RHx6W$p8)H2{KV}v7fZ2Z>T7%|5~Fk zpzE=lcq3#1uWMzz_~=jWgxUq~{DNK5#T#kE5`Dai!iyg21cwUN?dMF?Jo|R}w3olz z_yvxv`V|ej4@E!^TD>rE)v*yeI^Y0NaHnR!6SsC2XeJAgk~HUQvsqo+N-7SO zr7rQJ&2oG>QFeCfvt$IT)tcD}mTKo)ee++$ghQ1YSrKcjVd+S?^5_m%UjBgikMR}WZK6&XL+_(rE6IN* zkRb9zB-7zES7&-yp1_<$>x|=F4Tg)#%oi@H==qOXzLV{6#vIEzm?HUD?_!8=J#8tc zZtcwn9SzDJRAOOQM;3C6R>?$Oee{@Y5^apI*CjXOBI7pTNyrxCR3J>UUY{~=e|4Yp zRYAW2%;3VrT`J(uY9lxH+*#1ax%Md`}m*ERu{>B47>~ zX@8^Iyd7y_{7AgjLhp=#Z|%Cky}MIA^KVT@Tr7ORuSRhT(+Ht6T$jmj|c9tW>ow{CrZsc`cf>6t}g+{lszE%124Tv53(* z?QEq-HH)>W09SD>rZeb|;!sw?TWV68N5q1O?)M$bdqK~Zm}-UPoziuz8VRRqsaLJA z@>b=E&Fl#>8?{t>WSpW^V2DpqPaLe~m72yRCwL&m&XS*nLUp$o9wkO z)VQF_R_X>g88CHGF&x7D2IYWGiry3CR}PEERishWSWi^MlXC;ikp zf1TTR$}1^Ef6gZl_JuI7G@q&1fh`c0U!Z<{Vz#&`y)XUEl2qxypjfx_G!#;7!KZM# z9?J_s&o1sZzDfK@ay~RZd$+|x{bK@sImtNN>F+-69}$AIXf(wtp)M+=l{sY_6h;aW;C50S8%fjOWD|QNYnIY;8!>ji zZ%pY)<0ZImxpSIS>sUZzb`fbpeMTmqZ>r8)DkB2}Gedq$zTFZ=>Ytu{rKSTV1emIbeewh~a)0WaZskT5hu;cUOJ&XXfaW<-3LVJg!;Nk_5#% zG7Lr+T~k$OKEfJkxe&|9h9tfF^c&eUTxeQ~?&r#P)&$$HFeFl+wrQjE>~(xXL7!h7 z^Q?Z8*VIo-!H)xX=V+i4nqx;2?m$MEgodC3%#2cslx;0|q1kCPoR5Z?kvKc@EW5eS zxEo?Re#WoykWjKumt?JWmRjgSA@6M*D;YN~KX&be=hD_(R-a^S)%{h)v7u=(GwQgA zj!}ww-li0p{LRgZ1dQ>PlW3#F)elLEmwqfpo^ z!`WvGhlCZxm4kT82HpifY;R)w2{|aYQCTAQI~fw`8C&-U&lX8rBOvi1d5{t&R6S1J za3&^Xl7DD1AG_4-;JZd;YHU5nc<5sH2@@Qj$>H| zQ(TmDQZ(EdnmZqHFKxzAqK(vW#?t=TTAlL6x#8}?5hf3SMG`6HE;QZ<%JE2&dBWsQ zeA|?|Af30#DLw6+e1}P?Vnqxbd}@$gt11M$GQUT>5Huqzu`nYhKS0$m0 zDEJ4AU9_+&;>vKP{$5hR)-!&u^!&x+g}hHSs?L08yQo=3@^-|6Ly(@X-9PfWf$RDc zqj5@Kt&)YV#!;srwY09H3^l>_6Dh$=X;f3Aj=|c`y#I1ZN%im_x%(#2qHM`DR7o7D z+u!10v$YW_aVj3C4;#6AQIsZRRqL`#>0G)T74XxKm#pZePo49_he;-8N`w*POwZb#yX@LZUM; zY20PqT94%pK3i%sJOvWR3(T=})Tzml$=|w1t|g{gSQQds^R_G^|CkRKj2|7hn`kII z(zmVdr~1><;1sZOkWeSIq#uAhC^97Wtayip1c5HD)cAH*>BT-sR|$mSnyOw*a(LR% zs_le!QC~Xkkjd~ZNwERPw`Sun>`aqA<=d!)HpWsYSTy4zmopY$El(?DbP~L7xNe&8 zs&guaqS1bAs~uq4CjD$Z#-ONx+Z?lg71sG_N6j&cWF{K4PFKsY*hWkahDV= z85N_hAie4I2b25>wTlgZt(kjm>SYRgU+LHQ=@3F3THmT)Rg`(da%JthmO@_@2fpC@ z$v&KSn7!UkyI5aQSa`FTsnQdNk>KN<>r~8Q=g@WNR=+I6Rh^ri?X944=Z{ zaM#t*X$r&-@?Qo~x2{HC`J?*<$>-#=YARPhf37P&=5|#CsR8BSzRRHSu93FsR-SrQ zso!-V#*Z|(IQ_h=j9JGNZqteB^CVGw2hM^jNs@Wh?699JqaNThqS#Xu!e=KzxUm`L zOesZxJK_=+9lqm2pM(tOkPb;9c)B9(8NP)Yial2RgT3SXlay#D&E=X4a6w+2PCjL{ z!eGx9Ji~QP`9*9ja04kRf6lMzVWhbGD? z2Kvj7q3H`E7Wqgy4GxlI=m7%%?&RM+dxY|AJ&W`H3S$A*#(RYnB=)r5L2sAnxA~VZz_g+ndk_=e zm9I#81Wlwu9~g~j!UQMqEfRpYAr5oCC#@a!?|~QqtZaKg8w~fxLZ?n0xW`E61`#<> z2WRLZn%hU06-hw=t@O|y#{y<{4m8k){QcBGK=R>>N;sS+Jn)YM zd+$wey@OWjRmh7x!@{Bk*KAFE;#3#lh*9`6Afep|l-HbK^bw!0JPU zEugq305|xY?4$iJaryU*fcv!s?H7oq19Rx#U^WO1+3iv6kN+NcF#2^#-R{_dy$bm5 z-a>Z`h%<0~U}g;(b!|m2&D-7Nu`J`-6 z7620T08I)7l4$^k)f}YP1}N(UFa)uqr0LN>5k#YFNPZKR6`Htc2c8y;k0paan+9%Q z2wmj2>Dzn?%o)m{VPIHKLE+(`MT}JJQ70PYDI7rV3h3kl6G-f~`;A%gPY+-SP6zzg z?980m_D9I5{&r?#*#VEPALx<+;c#3nbO=d}!+-*sM!?Yg90XVSDBphI!GUyvI3k~$ z<_{kRgFx`%Y=5te^uS<>0QH0s3;6c!iVTkV4~+#d896{o6p@@@+SBelg>Vb-)ll>OH(m{rZ<7JP zHSWPeZf_mn1**jxOaem-jkzOk;uMhmo=PU&QcP983Wo>q#gG;nv&k=cEFi1EvN|7) zzyIk9m9Oso1q}XxlZ6B=4R^NIk%}6pX&*9MNJ5Mn*a(Gob7^34cPk+@1!;U^+rYg% zZwB>v!G0neyrlO}k!N(G> zRYU%L<8;uldCgnXW;=XGKZW>qjw42IC91|$-A6r{mXbR^-yjNnRKNO@AmyIVm6UpZ zu|kRSo)<8Q57Xl+v};aJZ#;NlY^yTuvxPf#CU7QvVPnd`uEvnzQ)g=+YpOd_*ew;-_Wrq5SDW6kC*at4w3 zcKXF+D^Zw>nmDvJaP(Mk+J?ge*;Y0U->uPy88!F?la3X`epDau1}}ZS-CY`a|3M+X z5PumLuv&7gMH(8Okgw|=R?*W~UcS*{!gkzT5>tLBYi4m}%Am`b?fCrm#7g+W%2cRt z6c6*AJDCx3qGAEZ=a=PwD8QHUKTQ#ZU!*>+fD5b)X7-}kO^;)u+;qGv?XXi8Nnqqh z+L1D_IK!zjy}rAXeMo5XBUcDUL?vw%H@!FlpsK~c-t4^&S zYsjZJwheBQX7()XE@!Jg&vNGbFk!&fuoWSP?txP~X3Q%+ArBY0u)f@|h50eNT3#<; zx6s%>7r#3=5K(%)eB7e^s2(Nj)2gcg^039wov(LBl!D?@Hx54 zft4r<@k58$3e5Sum{#DSgF%?V-knl8W!iufR(CVfx0j2*HV-f#&I-XlvfIIWq0o{r zuPk!h3AY)j#a2(-pU`K)$IzP~nx>z&UDAJ5DCz{mX1BcM&Y+KnmO!rhtzQ;W`Lt=J z8;0u6%k6t2YB2PUHXv$-z~Bh7J%Ye#2!=4+Pv)Z&1exR>J&LD9p4Y=au?ybb_zAtn zFx*qI05eNT22b*vcz26iO(N{SA1h)Bo-AIE7nkH;Vl4iCd~9{jOl~|!&x=tkMC0SY z2bx^fUDuIlBWrrbw(%z+&58`PxO}{TVeE#9 zuAFUU=H>Q>Qo(Y|R?oMVE52SbuG^9fSDY5oi2bf3XQuhe(It4igqe9fr8)D@^+K&xOwIC@iIS6BJv4tt`ngv!IF>+0Lxb_C4= zXN#-T80V@kDYTWANQBOXXx2GSDJE#MGMmt%gY%oq$*YU$l6gOS&#ra{Rf>z*7Vh{n^iDoZ zFY5|y-!up#NgEilzQp4>nD%VwN?7LjoGW!yKtg!3Yo^NCnWFUvVVUoFxN2bh(;B}3 z>}kGJOBmNNT8-Bj&1ob|Uhwo}uv;?^V{85VJel1nxb=FrA!*9<#-6Ug9F5%%%)Q%1 za?2;2nI7=`xE;opzQwz|W*pyoJvMJH*_%M!q%J1cSbFI^E*|e+e%ud74EQ14@Vcd2EzIXpJ9RRxvvw2xmG(0n$9tw-ZwR_SCeR@X{KeBxO$NG zr8AV-luDV*v}<}^;>w~k${Ro7K9f}@a1(opIClAfM|Yr#WQ26yy~{VrDwzilXYuV#3MnRSNV&yDK8#1fg{mg+27 z>8D!ubOpb-H@*QI@Zr%3eoI*FWIy_qs#WuH=!%TTI*<3y_JwPX8hu+C_U#G`U0i5$ z=-9=O39cyg2U-Qq=x1&S#3z6DO_I#>=bS!W)O?x=yDOz3*>t-4L5_l=nr2w0U$$oZ z55=s#m}`bRLJK_MGxFQ5r~T`S$85vcb`NZx%lWeIxHeFl_e0*=DlD_U zOd~bg8pn^r9XV{&>35n_kDo>=u`+zsnz}8|`$1gKk5(af^}?>)45tDom#uF}8(3@V z>)+BEc5^zzlZ{BZDdm!g4yb?rHPtv2v#_$V$4o}x4$OalMT0ny8E1l9dDY&;uPA3(my(pHPttEbewcD}Z^vw%vxS^1T9w!CwI z>$@gy6-}&U8(p^1NOqj?Q^{P5j=Zikmxybf+BYZkFIUi&T?^@;c z*S9X$ejg>Js6D!nVy>76q5IgNT@y|-J#xG2=yA)^*Pm`33Qp5h{vnG`Bk>3K zBZ6Ey9+L&7wAzBWyb}`Vi#7(v)e@$}TIjOM)GsYJHdS8{scyVYf1}sMr-nmibY?WU zFF|7E`dZkZYOjW0&8xEZf?hDK8#-M7T)m$#l*M+}o-)P)ea0`(>Ej_yL1jY@y=xmU;pWmTT+lbSx}VX)Y(R(31t$ zB^vcW(!;2G zdPhE%wuFJlre*l0PSdN$)0X&4JSE|-3--huxp zU5ui5)u@46Q55h^N)*<(*QD8e%}jF>Q+5SXxIxxXg;5J;1$yfphx7L9UFuTl zf8tg>xx}f7*1P5iH7BN2y7{SaX}H)+J4G`=@0;CsOXhAie$#I)=1ssI4Q<3#t*w(f zajLJWcB?)o+U#mR9q)uoPk`rd_B5X!{}Z<7y5Mh%#MuWVLxZY!S)xZncS38SdK8db9bFK{Ile3l18I zb36`5CzD%_-Ac1$6*{M?COjXYySAZd+Qbz$bM~N`?%G;S;wY)i1aTi5PHAV5Bxr|J zW@ETCMe(4a?KV_PLMIJ*;7kg`oG1w0#`7&xy#wOej@^>gUh? z*aZ+-lUEVnt8m-S1&K)@yb%vCu@dRfW_726RKOJAS)A1i3k%txr(|z$p9Hc<uSEu${EE%|+4z^Yin` z1bgxxZic>0m-b%Z)`vxeY=QbmM;Q)>6T?FC|A@&EBx7Oxf=dJowh3siH8qv7C>PB< zx3$S5Kmex4kw8BjuJNej3NoK>fw^_hWnX=xO`H-?HTdQBfQE+$W1V|TdU$QQXC#GcL%#W8JHv56RL2(pdk$tb4BHQ*CJZ+>aWVZjT^@FI{|Zv;1{5y z1C1XMJ@RyL$Uu-9pi>I`H}~F)`Sc^H{jvDHAMx-EFQ3`hve^IGt=E5Fv*6c%_xb#~ zhWkGc+5Y;>zy3Rq;@8{n|C~?u>#pwq_-RI1_}2sX{xDV2IeP{v?IUMJG%XABVdB&Q z;;_$p03FTkuvl z-D(1GVw2W7Nbj5j^@Q;7aM4O-BA?3wvlFMJ4}3F|KD~5F{+#m>qT6M+?|n6smf?t- zgFazxeSLka*4_)dJ$S`*iLxJ{uLmC=K4oKLL*i&iLj|;aWi&J*k&Yk;3)i$>-N$_Q zo+SPLA02;OIYwM1IU~|xk+~e)B$}-OI z5&rcPwU^9&8UPEctDPWenq$yPO?3V| zJqY#z_-#m3O!4~-;7>zek}yBNh=>m$JkT2yr5PL%p>E29FvFGE{uH3{BO?T$kL82X z8ImqS*m5CY`J-cFV;x=pScHi)hQ@cwVJ9Pd%Y5g%qAZc;zurK$vsR zl94e5)XI-XLC<3QTFHJd(2&#YcVHhfEhs2JP7Oe>CIVG!5cZ?x<5LF>l~N>^1cwu# z*$r`=o+mGjf8?d4{6cnh?=_^S{C>rL!-xC-Kd7Nj5ml6=S- z14LV`lNwa-1hK0*F5bo>0U|ibk!U57|KlUNwzr@{ z4*ptCw?1ToDkd4Y2?3#236#pFeV|mG!N!7qo084ry$7qwc4h9b^fr3LexOW(AwB}% zkU+D6;_f(GntvCcpVuAQZ!R$1|6Lj@u(7jKA#;25RDE6D{5De%8EG7ha+su(00(lU zS)CLC4IK>QAm|0fyni1Q5HKR^7%9D)?VBKo*B4LiUC<(4y~s!Q=MX0*{w>#GK`*H% z+y_6ISXu%R^Vw8W_=5F8H`US6bMUVT`;k1Qe7XDYPY>U3^I6NU8Xixi1+?oT4Cg?1 z4$Tk;`uijQl*iB)LmDZN*DL|69GRTX9^0E-c6;DK^y|E4eh9YcoUUeoGa`-6H*bj1 zC^o21gd_b~Ye!J;wo=x}w7jFNtX%p0!!;q_cJIN_Kf-0sR;d|y$Q*DDBcc_QAbnol zkShDE=RLL0Z8Gf~<~9N?t*em)pXq{B$Zl)@%6+Bl4<0@&iiN@dk@+$R=s^g|pAveF%?K6+msZgdpKfDC2-Z|0Dk>^L zhI{wNt>@*hs3n~OikC82=p|5lR*Ti~K%;V^6Uls0(p|X{-D0x00;GHYUZz11lo>$l z1r*6!puc2n=LBjT*jQE5f;idqtp9YB{G0b$+t@&AaM)PX83f~>T=FnBy;rE^21?3P z$th`>!Obw|2j>t{=a5iE!idmM(OM+|k(Umz@6jdR6Y*W&*wBcz+uK678pmdSClGxz zmv`4ZkzXbfu>`FHdC)Zqf*TA6L<*?8IDklnaZ~euoH})&NE>}YE16F|O3Tie^n z`33cPq4@}wH%L|DL6F&=CKepDzZSx+4p0h+UY7jEo-(!Jk9j>7pPEdx^cV2R_^*PsAVs11#Jr-05Nt$!qR zh=g2_8YqZM;DS5Cs1cFvbC-KagGpNo2xYQ6L z1FJ7!FSRH0Go3nClQj#{G#Z52@2yAB7X*8FYTe*ghB}yr*I0P>xEp9ft-<*^S zDv6x^@#}Xe=oPLDg{Nm)-F4x&y8BlTztGM4ZcI!n#uM{hDo4v_$m&bf8;Yi%UvMv5 z7EfAz!8Dx4sXg{FUs=omaHitp++N7W!!xz4rJ>Y)^hgv7JO?sz7P{t|8uR3mzmLwh z9-dPTi_JV_y`eObA2+p=n!?QLMn^+}+?CYcZS8EE)D`C%`|C}#oakWRUG+*~cX8q7 zF@EDM%OJEyMN{we!%yxySXQJK7tLmx2(Zh!L^*wjPElRa3@1^Jv4Pxp81~v68Bch! zCa?qgJTK?cv6Bg;ahsLRSmUpQr}Rg$3t?pz0I+Zg@MBMqz#YCm@FxN z1JyTS47&!c;Xd!89qov$=S=0svur;&b)?x8o|I}ljZ2v(l_5PJA8_DEPvCW-tt8II z(_ylyDtLHY?7)tYZ?d%Yp*3pK@>!EJc|*~+jrk750PGtI(YA3+3?oq}g=jMu+^e=K zOw5a-@uT_zmmM8^SP96mNo_X!>)Jd-%%$D~lz&99a7vJo6COS4M+vGjnRVL#*oD#Y zMWe8g^PoD!Frwtbdc!*OFG8O+Pxo$?l8dJvmc>3+dG7(rtejnYHE*#jeOS5=Y*~JRM6X0~f~8MQk5g&KIdJFu zHw9|BF`fCJ8M|jX6oOuV{uUCFmOQ=8I3YK-9CjjUdf8ei3Vw#)EvuJPUtwZ+)0XIj zIwK+Gw7-)oF!D*n7wT*c+6zJ)CO+mb_Wwhn6H`t!woIo#ehF?KgeAIy$e?v6wIHjz z6JO5luWGKs(auL85Bi=Gk+4u+ZwJD?|I?B_d*;6c8t%WQ+gsefLJj{jXsG^gLBn4! zK$3zD^LA+ZF9HtJJ;?k9$~6wx9Veg{C4wv%IxZ2B4R|_+pmBX+y?_6u_kNcyY6G-u zdfNDdX4yd6)>bC`HK7OG99v=BJrE{5K>U^-6!4IA43fD+StU?^W{>aVnR~C{CU)nw zgfKC%{(wq8yR;6Fp{S@R{o)g3Lkseoz~TC^@cAF`g85(Uj@(fu&sgSKR&Rn@sfa=k_CDY;0OeR52*6B%3(qX! zuX^g)@7iN(bbK6%#=X615d%4I9F)z8Ae)S|pWBn*B{D8Vq6^Lql zgZ()LHN)b1fkU%?p`l67p8W|jT}`d;0#gzbg|P?}8y1!h_%~=I;G@$Zo5;`4A4yO# z^Dp(hyn!lJ-G=E9pvWP4d3gX7hMU&Q5nKpgvOen8>ol@hh=Sfk46l?YUVfuMF(XvE zdO}U-9Z&@y+>vDOdt3^4VJ{-?%&M|?5Ojgdr<1cYgId{mBO8bOty^xOPB=5c8jg*O zWEMdnr~ZN4ukZZAZkNd94{jgcgL2ygOTm%%y_*}Jl;^N56#ZDr_03HlUX6uFsYu4% zcZEE?^uD#akoTG8Na^~=nx{)kc}hzL`Hzz!S+sXAcNmft{2OBd7g<^nRC&VdjDgN)VoUDcJLI?h4i{jD^uH|J z`*Q#Hf!g%1y`eR?9&n@Gds*p}|EGmK%<+PSdr@FiR65erh2pwH2dswcnwmPGokT=V z?&3eVj~;XQf_Oni>+St3lrLv$ng&vB$N&!H#}dKDv96{DIr_CjHy|!PJ{h!aI~D`5 z*d=640)8Q&6Ps(mKtm%0uJ5hT0zj@1$hr!!nO4vVYX|dD^fXX#RM^^YL?rI~=3=&S zySTV)AZct6#Y2ENa%8`M-vbT)K>X>ZqzI$|H&x_80_u-&2snYM#t%peQBNFokFfju zn)bl2T7Lga;N|^ym+EzUQls%r(4j-1mk@%)-oaH0IyqB8 zN*X_{l@rI|4v;osr~?p$6iGnX-j%A7g`yt30WH{%!<|H4YoOdWuc_BKL*)vOpKkK? z@mUd09qTT(&qBT)phcMkvSCnN9zS+W2B38$nh!y!h#S1u+Q9K5rPs7b0zMl=P6g<< zAzenexv&}ti_fZw52VBIfYAk9vR|==A)G|Inb0V{R5ex`vE6y3&ej?em`U+4qwL%S$IRmpa8C-fzShi zSWeHup#W+Lod6#*X=-X3MF3YWa!v;Z9h#rkFHV`jXxO7wc1nbTa zGxhT}(ZCVtqHq!(^78U(2RB3{bAvPs`G_wXzJ<3(+K|XHCqeKwM;=^hzU4N#!o?<@ zWuWLA0xvs~;8?pFEKAse{6y!@)%1oriQUA3<=I!0y#9D}j-{}0$t;pWiUE0+JRoDp zq|1?;R9+70&=Qc|jD`-iYCYg($li#I->K`l{~xq63t_rVE((eR7qTzN*%J!d%?*Hh zA()&fo{ILLY8t8+NT!hfDENq@7B^(ZDgn08bWC&E-k^gU1K>(A^taqatN&~} zh!&`Ex0_|q&PhG!sxzeJ-vJ5tpVD^|lfYkc&Rm3h$O%%7jo!hXc<8JuocF419fmGE zHXxuK?BlVgr47L38kW>oP{19Ykd}Upnq)U`cEvLSHVXg>`KzP1Ou*8Jz39k0Y#aDo zLN`nji)=o_bdVM?x@^6?M}uM!zKPV$DN)XHflfQxRtx`61sp8N5K3fbz{&9P#ful< z$tcrO@P!<2wtDS(mauUC7Z}>O57Ds=86-hit#59Y`s0s35QeG&9#bGtpWMd@@{8G^ zCkqaSF~_gnw~-EYkyBPq1iT8lYHa}TA9Rds=)FlJ<-fVGgV+gJu)2A2hkUGSz(D>| zP+U^d4lxGqdiwGA9tjGugUGZ6Vptoxz|bFocr^hlr33OyzIlQny4f`+USRU8+q5e{ znqW_K*LyPh+V&$j6p+1I4(vK4=(@Y!?1~l#8W19Xas-b#7xgre@-cKL8KC||rmg6b zfbGU3yII&n=+Vei8wrtDYRiCpH1dLUhQSj~Tzo=8y)K&pG+MB+(D;9_dd$`~{UJCR zie#$Lm-14n+LGTi|1`nvMsxk})5!$gtuK|86OaZX<@!{eH0ZBy4DN1gh7JCZgoDc` z1&GXH8Rsut(C2Fo)qJe2s;o9i>^gt*Ku92r2#{d$Mec)^tv6uV7g3+{^tp4o!fcRp zd4n#i#>dwnH%y|Tp-}}hD=D;wPafRclu%R*&WQ{O3Hb?^!10R16VZDcjWF+_1;(M5 zV9~SVSFnQr=0i2`M~FOnnOxq#Gz(<8M_hMsuHG=A(V+}ds;4hsHWOw83igk8c2AQP z{ar?;ri`FJ0Rrwzte{$G=max1_d$sJ`om9n&Be!_+|?7ftgo+s2@>`y6mQ*SDq87# z%#c?=MRj%c15_{Gx3I`KGVlkUkZ4sr15W7Pz3w1rGu3-gR`m9Q9!QVTYo6KsikIHr zHz+SccFhKE7ldJpFb88O%m!Ve*P#1601l5l-oQYp&}F+w9Z>rOv|vBr9>E(!_+&=_ zqA=TG0@BAYA=0psz$@fKdL3~T1hGHCw(#b*26P}yh1p%K!w#USuOUjn&yaG4WvqZZlydVW zDDJM-K3V!E^t!zKj?p?SA8$DM;6X$TifUV2C*{wg%}+_GrVX3ExV;P}z&kdlz#fb+=v*jZ||QDK<~`RqY(11AP6z#FI|LP*aZXz z!R6-l)!H|Joypm_y?ThxT}6CX!us8j7_2w+jxJUEUd|)$1tXB*u`#{1&CTA|d6`&W zQC&T~i%FEIlGHSddqNTA+Q&JO0e0@rzk!mtr zN60~EdIVrFLMB0ytiqV{QfO$XWaSdL?~q`<;ppj$@9Xw_m!g!O>v&m;_veeokgsDG zp>KJ$_Ay9RSZpjABuxF0ru`pR74GO7#P*rVKBvV-56Th9&$5iB?hxKRS;0zhncsM6 zrnl3yn2M5;7+fY)^#%!~-?Q#Wli8nGOUIVfKo`||ZC(p9c4c2AkdNFVVb4Naf-zzB zuHcN_I~TbgNG_A0YAj2>BgiqHI5^!`AO~k2Qkd;uhl#`X-R;3$+-iR4HY&f7B?L{mAcultS$C;(5u(f#S$VyB1&%jO zd?yp{15N|YcUIPqeT8r#f+-Ol#I_o|Y><^B(0PQBNH!>k8}KR0%g2H#xDl*~y1Ked zm;2orI-ZTL>Zo@I79cS(*YqAeLSPR|@%%HS+6r&|0HpLUr0yaLZBC~~05Ayn0STfG013T!6elhZU>xtQlMAg z31gIS9&vl7A5$}ggW%kmGY?URQUvSk>k9!$pQ*XISqb#`;q-nF5hf8}l*r?!+oe^u ztNh`ef;I`6)JRB|!yx(kf$kv&*|;DZa^xg6<#|o5Cp^Lmzd{;DY``K`Ug6z45?FlT zQl<$8MlpcBfMEf($b$@RcPJIo^1mPV{qkxut{9^a2Q091P){})Y6l&?=5Fnx)2s+M%z$9J<@FK8#7TWO)qydzO zN(fmKYQm$XVm@>W861Id59%d;QNe79#9p1aGJk$64IFjD3MdR z2A$-vFk&kJg^$rX$}d2A%mCsYIQjU3j{=1V)GfR}ZijAz#YWmU*oiPg_`a`C4XqnE zPBV}SvLn5`JcZAOgtGD*D1eNcRKe2|pc-`yv$|d*203G+VP7a#Xn|BcSG?$h5(P4-j88`3N-TN;pm41 zn-Y>w*WJxQ5^!?IuP|4|N3L*x9yoxUImU3f`p(YI*>FUp0+&ie-b7W}+S-%668<3( z@7tdi`wFgUfhPg7M1WrjfJ;qyYU3G0@!;m6A#?rxJ(eQ?l2I zXcT*0%q_?+ z+rfS@5jsH=@aI>EJh&BNH!t_=N7WvPmL7qM*aC3>EjUCJ&Yv|;dbstO2(B6u8UcbV zeFf_cgBHrE$OI~3ZZ`_R_aVq0RJ5C>4U&3lS0nO%QF5-tg#iPKMiNUH~Rnn5+Y1mwvHFa}uw$4P7TN_KSgEhs|>G!sU-l24pCq3&Y= zhVeZ`cIiO4`_QWxBrGMR1bf{EjsnE1g_bgN9^sQSI;Rqyot&KNNR2|&Z9^&PfYb33sCvZ4zVcl zsF0CfXyYRU7o$u+bqvlF)V8*P32Zp75Wb9!1rHjS_eGo^WOuu~1rZ$;NC5oWt#MPS zmc+)zVR*q6)*6|S*p2}{jLas0>IlcJ(IVaoU{+{07jz=hA;B{Q>>+Iz506j>|IZvL zxV+YIBEhlOFslqO2OKaq5YUm&6)chtog)OKAE^%@x|ISEvvdlooKyYGkMUTJzC@Pd zPyis;EJi8(gn$1HaZ_LdgSfeng(^ah3bzBwsy7{J>d4U^QJ&Grb}G)%(UAelHpuf* z5tIVoCJ{g{1cM_BJ0RdRjBlMmF7jYn5_eV5BN2w^JE$zs0QR2WJqN%ZHn0~V!ue?t zBh(G%r4qk?*M@O&vGn4FR}x_`7T*R{5`tjD!$|?s=$o~vC@+75ll}nJP7e-ItUy$$ z9eVS~QZ)z;MKCY@0Eff_0NPL`4JF?dXDG9gH!^}KP-6iSM&#^_p2Y`FkdZEzZoU4p z_Pm!kAGJ_Sxw^Gy zMtY3WIRGrf2W;d$BwhD`hh{NabJ`Qv{FbBs2Uxrf&&`>^Auvb`WZj<-EQt|;50XyE zf7K(v`>f(Ipgvj|%G26WFo|#-EGFN>F>xQ%M~6p7_Oiu%z;nETr1w5Cu_mnqLYQhV z-ISEd+y(O^RabOQ<#@jTSqz&?5c>@V|2=I`m_4dL}xETWVS_+&y zk#K-@#YwIoFvONO>v%ZO{XftC05T1uK>OfiU^DQqrE{RRwFVzt1288+jm?t*sLRqme*Bn>f3ltUfAe&L zW5+_?K-7J(TQ+*)h7ce&Yv1t|ftiiq{Ck7@%Iw(0CtLF-=ox�lulE?uKVvl>|KjpvVaezl0fs zcnGb@(b1oP+Rbrh^;YzwA^w+>bwFkQ^|BnbfM3T#$>~`Q1%!I#H#}Ms02r{)?z$G2 zkbqJ~w8aqX6Lw;4Z&-Te(^ucgkIK9D9g*pO1JyP!**=JlCmjJt1T1>IZa6XS`;aCjCep$` zA(}h%tb|L~J2NEu;+>=j^W_{BK}<-C8lad53}3(s7!j+HS) zbC_I1Z`QwC0jF4F>a;XN+9&lknv_?z%xY|SXdhX*tWxVM`IE|L=2%c>CU|8ys;z$d@U3Xp{w$%oIu98&{|3b zc&A|1^RWB@I2Hpb;ezxA7I8BJ7|SGreBvc|01eJ$_Ah~&94Q@l=zt$z1SK6ab5rBo z@x%vx2cEw5+PuR1*ROUB-lsz#Qvx?EB^xlXe#f~IERt!iaY$tZfCeF!h>Z`Jm61J6 z z3rP5I@F1}ooAlxJMV!(6&}TH5;jr}M-OHCRUu~Pa8G`GW9yS)R(QfqB0Mkl_yAO>S zdM+*{I2*7#POQ+q;ek4-Zk$8Z=jWBfIe(z-_?`c#ly?0-YXrU*N~$5nUjD<|dR9vu z_Dx#JN)Ouw<4SO$7-OD$q7Dwn?qx1eMn%%f252FE|cA|D=y2qdn+yNVMcx4#RN z@Dh}>I39Xf6*v5sRwu6i-!C^_RrNA>9JqHaXK(O)p$@!GN z64ojvpD?EI_vjmZH6RZlGqvo;->#u71f^9iEiEWlgHkc2Z$n^rE30&8!w7h%KOk)d zZ@NP7SvZYTab(aSLEevGd&u!*{$auhJ8X-wVAbh5rF0D@5fa`fANxa^uzPlc;Q!jO z==OGU>&R;=a;N#*NEe8}SER{TY@+Y^8$$lITR=QVzL-9wm>9rbe?UR52tT4xjSp%C zvXC8og>noy+wUPy;{gTob~3n68zT@lJ^o&Q_t5$Svh|)j7zC7#%oB*S4%>=DzbTxp znQr~N5_jHoHYFegsSPZ?WXA{zLn^g17z7|wr=;WN?$Ij2F2Kpgv;E+f4Zt$2BC^mJ zM4qmw9r6}q6^~RL4S|2li7Vu0juQw8XI6Pus5>}qlvZ#bf^xwBsO?~_SLaLf2kVV z^^V&cYwI2b^-eO|lP*TKiLd$G1K zva@@n7v$b$RzT`|>lCE`w1`oSQ(W>MADc+|E>EQx*yZE4yK}R%UC>)RYO;J9st;Ov zWO=`~yKGgQ+RHuAXXk!D_m|l;O+oP8xpPj7qL7EzDUU9hDD630dUWO-b#{H@aJB3< zQfgrCrO+j+c*T9iy;{)F4ff494Jmcpi~ONfH5gx1zyd1Df*J#p4{l~Fkq{(_5dK1U zR7~+KUA?2Nt8zJ!Na7&Y)a`+ISe+8OEDnB9SF>@EryEh^&T%yqzM5C|dZm@nA zPC1|1@L7~B9I5e6(Iwiyj(kv3vWu&6J!x~R5bVeq2Prm->f2yoEtIT-%YCbp8J$`-E=bU>NRUDEi5wX+r0Ap6-sW+-qo&eqnQcT z9ig8AtW~8l_7GYRIwlEmNM9%|qBpv|s${a%&o0DD{}OrGo_ zjS_{<$p;0f-fU`eU$Vs4r*K(D?Xj5NPik#KZaaBfqPQt?oNWEgTemt? z{1Wke(kyWb#3eK6O9xyigfp~%>GR^^Vw3|GcHYQ&*+1i3$0%kb6-7?qYb1v+aB|`d z#3ixwR>X5^;fqzC?;RmANul`BlIClh?^THg&NC^?odh#(A3pkkJZ+p9S!I>DC1Q7( zVI6?%g#}G#%G53r>z#|=MPVymWtY%_dUJoYNuCEn9u+3~1{f z{8L3qtm@&>kdP2X`su(I2YU~%f9lD~CUSu^V}3d&hy_)mDnkN672R-y#oII&oWTDM zHRSM60%fQ`Uz}Y2wW!AV$P;-bF#*F1rx2`% ziab+#52*;WSX|(l7pSO41&Yd;E>sB7t2uguN(uVb%*!C#Xij_bgOD@IClF3qXS zMf??>sG(?M0Au6Cr6g12YFB=(UK)&pM3xoGDusunMcGLuJJ&=vro^>Abpc72V^@+E zlEtsZC6eMDj3Z_Avm;Ht>k5Xrc~Saa7QGgF%$fMP_*7|Xdvow7NyWh%cBrBz0VeMz zJok^y<6KPE{cYQ!_?>Mvsle;1#o^(F&|ewEqyVmG^?hnI-TAlfx#6=C^S4X}(&mTV z{B?-%E3yj;-Jm7f+}vFM)4y9UWxrpNONb`+q~ql~d+yw2P~A8PE*7n2P}|yd$P6K% zqAQrYkY)B&vht9VFJ=G)OZjC=+w$;UIM*;j#DJS5wpptMTP-A^XVBi8b^P`iyi$fr z2@XI4tOWRpM?5vvY!+^p7=i8J4tvrGFYS=%;;)k7BJ@eF=$hI>59H9HLuKS0li%i& zPNevy`z*jX;m^>JlPvcv$aXgxz7Wc|$j-Ee36F^TgwIVw%eG0Vki9Y#lgUzA3lGkn zB+~%dQEk8ug<*&BAFGp+f;J|~WW5OLAo*181lQKEfo9U=j7tJK8P}+exJliD?*D@OL)y}SS^+i*l~x(DsY0ZDagVGuG9r1%z{Bo8{*cLp zGLSJ-7S$eP`G>mz0T8uTuit#KCh>JYap_fI?~@XR44n7V90>4S`BM5=GhZ`v^YRaJ z=1q^Au>NXbfgj!Ac0-0JzS3mz*NR$FDKummm z8WMU{;j$8LbiqT?O@}g;?R7I4@=p5gutpuZr<`8Ue1m5kS~Nvk3cvt)rj`>Y%58N0 zcd^;K%bs%9;mv;JyCP67Ghk8wWB{cvfLS(hLo7Ch+I(gvLmE}A!qpE;s>$_p72sLD z>S?7cErY;5Oiv&Tt5-QbwG81BNh>+ZIo}8**6o^WdXc9YRvuBTPUFws{aIV;qvca0;_7}*eZz?sk24-r3 zahJIWvMv2bpsk(P+~KcGbKzHo_TS?*%PGOG;H-^9jDTkZv6U{O^(0M6bSk$4=Hi$PtJK3+~rYw&-TI>R5K ziThr3p@43&s3_@2U&lK;b|4{o|86_g#wItiZ;Q`{y48{*8vX^MZu+d_`@%aGV!z2c z0oS5Rk&geBJg1jXO@lqRz*%tN4$K0OoB8pwDg~Oi9JVCuJobf^A=kRcj~{+Z7U%^ zuKP-==gVon=M;EiU#Uh*eitcJ`c2_K=Bse6jMLToKNlSnbFZK2&I`Qip*bFCEWtn+E3F-j zmzk16^2GH`&B0YJNvap0rZnGhvQ0K5Piy#a_A#TjQ|o$P8j(sUQg6z{k1LHoj~EPD z{n&H;^O5Z+=As?QUSFR?)vu~yivBiHKK#1UfL1>T&U%WwNAe4m%~{l%`?t!h^G2DP z!W_eIUcE_1XU0;|>o~Qdyk4DVi62PC)4n{al)*~6&KIA=U1}1$pLECfb0sQgj(j-Q z*56E5Uu_h9rG$K=EwiceWKl+d=oBQWiZg<+h8Tu`@X z^4Id9Zug@OjGgpemVgk<-Dn`8>09^^g%T(q^iLi6F3t=)pIb&Em8cJ;MGyEU7wxU6 zi7&M@^~%OuLIV&reY%<9I$lZSP=k@o(lI4gPlG_ad*@t|{ngti&i+P-XHtc!QbTFf z65WR!rAlFi?+=>q`}deYO`S4~y|b0|<#Jez@bpC{mbS-BVSNC`v*>|vAfa9}=!SRa zOtcpiKXeAcX1|4<;}EIW55%p_?_h9`*VT!2u@2JoHmVNO- zJY{KlSTiL~PB;iiGP>kcFn}SaqKN`)ad@8$&w?frxFi5o^bt7AP_5e2g6xXyt0udO zIYnRn!ue_3sNz4f@2p)ZvqWgN68>J?ZUhQ>#d)}jK2#DxDk75*+hJELAGh;tG<5L# zXO~+SImy`*ovN>Uf8WJzLtfaI(Z!=QUxlvNl)pqs17u!3w5gh-LxTJc&2?tf2osI$ z^prCIGiX&{0+z(HH3uzl=-|N@WC13x`_X_Qrgb6nvBMTw1@U7jz_NUMzOyrEJt9>U zS;C|zj=F|!EhO3_swWcUPGi;Z>AABx8M$1Q|?nYDv~OkjtUOC(jX@DoZL zA)T;4<)kwyc8uUAacfY-TC{ZzkfXH>R{=+d39yt!C#k)$QKo<&guT<)iinY6|KtT;pWpX}7+2XZ0s7e%dX2H-3v z);rgcvu!`RHBYHA@Sh%e->B88LRSDV6r%?Y$mK|mq4G(?i^I2E$e6Y(ctj4BK$6LW%RN1Lx^UH!%3F7YFk=bbk7eQkHedf8CHo+PXw*$3^n}a(I3g31H)zd qU-)m+UH-?0js7?LzxIk0^$i-d!Cez?b~IByO*wnvtgM+UKKl=KI|yk2 literal 217724 zcmdqJcRbeb`#yeGR8&f4vMD23p{!(U$<8P<8Ieu0%2rZ}(ojYsWMzbq5v5*svCRiyPWj&v2wSi9Jcawb#n1^aCojoS2yN|Nexii@TjzwVBr&yb6u0mWc<2!m^qCy~6HZiymbKg|c5w z+2G8b@ee-6j7NJ`Px*c3xE~prqBdTCVZ+whUTN>@ocx~|y3MQyrMy$O>*hW0wGp!v zv&`+>M^CSQK}+oSpX!p|1+T{op4Ft326YtOaR2oBrk%Il$ydIg*rN=DA!z7wqAng{C~egq*M$I&40g^+I_b} zTHOErV!W4ZR961?i_tr-MhD& zZY-;Q#Ne@lq9S!u`i&bmlI|}5BR$>je8Y~Rp&`D=h7TW(pZfOix-gym`=E`h^0jHA z)Pno_EqytT9z9w;t(_?Mu;P-^;xE;_%A6cg6;;(?uR3d;k>n`4xR$hoBaS_32XDOj zUR=c-7#Nt+bpGDGdo5nAvc)Ir@}bPD7O%94lQU=H?fww=@0F>C936#oj+F-EbEF*2HP0}4ynTFp+!DW|s;bJk z)0FY_>C*`czLJygOH=0(=QNI01#*1<^pEPR>pOKH9NNMtq(?PBKc81npb|#Q^tR~K zQ4xzu_Vee@chkjBuTWa|DW+iJQ~eDw3)k^ZiPv`(w{pmN?t6ZL$;#H2Iv^k*;nuAb zwW!VZAIhYecbeXxQoKk`!nF30o)u(m1I82-6kNyJcZL;Q6F7pSw6eIkxGSg1xkpCX z1fO7FbTlKMl_o{$^lEDAfVL=x&!0bQX=`h^yGvTz*wAm@94zlM+23CslAsi{SJ*H! z^yM`Ho2mZlZaV3X%?u2&2?>!$?(H4ut?=ut@Z0@BJMlu%sg4wjs=#i#%NaTc4jfR? z)7zYqk}~z{n+?0dnN9D~k2Hu?Ed=9bPRT!2m}AZz?w%Qb#S$l|t74nd(QujT+qWUj zW98l(UfmGWytg+XFYwP3itVdTVRzO{}busu6Tn z(;r`wCD6XBVDc>XQ_Q`4T(@MLGj)tI4TPGtJ%rCJg*63#+2b?G-d%Jm;^W5?&BG5L zK8(D4S<~KLQ2XBAOTOPf4UF6is?Rog?4C$bCULk_yBx_W{_Q0$7Kd(1st>Hmi;H6% z?=D(rQy;BcI(d;r%=OQ_mvX60MCJpRhM|Vb5+`14V&Ch}+S}WkS6tlgt;I&q!EtHv z&+pndZ|EMG7PWZqQL_jO4W+`%Q8P0$v&uLbpLcq!evw&x*Dig*Ed6xt^G@<&!baJV z&8eC@;^KFj869l?nA_Ug%6?hyvB!w^Q~$ZmEMn%N$;noG)s23oJ$PWb~v1 zoW{0>YCnu-3 z$*ODE_qgs$_U|cn?%K}p)K__2=cKJ|7{cU4`3EeRT~Gcs3l2dhVS{}*xQ6Zd`S~wz zIP0(W@bFmAzGu_EdX5|`E2}5I-?yivrc!%dW|s~A^v~^~>ET!6($a>0TTSFY)_rWy z;=Eb1m5GVk!ongnER5!hudgqX;ONo@yu_zZpD2=YCq*qPqY*+hU;eqhdn2}W=M{OC z-#@=z#tM%7_Lh2d`a@ZUSxHWD?OR$L{nkfjEVhkt$|@==@uhY-_4aNT>#hRZ1nE=P znNvjA!#8r#xwyDA+Zq`e*}qL%!OqTp<<6aGpRc{ueSP=z>0&hkUfrYq-dIz!5+Um9 z>8W<+%$}U%<>5HdG+(e?w2FSSY9XsP6mHsvP--p-`J14rpS^phf8~nn_fLlv{ASk= zH^%Fi-8EKG$u+M))W=Q8^c0<1BPN#1-jGS-HP-$dD_B(E+-T|Syz|V4mX?-0m%hD+ z66Kh9cz9@EP4-q?R1R5f)pS?U(M8oP*R6dbaBi9!ALz_f|9O?UA7dS{ad9*EDphZ2 zo$P(;OZ$%b{{8z&Se!ip^MxuEZ{O-XJ@akwcE!*=_lD=ssTsB(dUkHbDoZOX9trk{G@`g8KQ)B7j%d>YaFu%0@V{=OtJu3o(w z2Sokb;PZjW$ry)s4@e$KaX#bgtIQiKSi#WQ)n!t7Yzh|A-Z;_-z z5eScsjWu(@@dTF%y5W0o-x`j?%nbsugNG=I7$`qCv*jO69zpOlqZ zcjj5`@%qBp^6p)Hahjvj;NW21iIth-J|CtcIIJ@ey7zGIuPg6}j{s^(=QEe(of@6nV0 z+zOL%?q1s-FgLxe(tqyikn@{+N)}U3ZGBk0=cb3L$BvbG?oZZ6&@qddM_=D*R`c>D z&B>D|-*$9FBqnkQ8)Tf@e)v{@16NS!<;xp`Lqa^*OAdK@7LK|(IXYfg$0BN37fBy{ zf{OCCxmg*9&TabR{$_36{IToCcQSL_tVuRO8cp?{=+1lmI0R4t%cq;7Mq`j=v>{Q> zt7g3GvD;uBO<0G%_ubvj-3D(zjdkR<#jsG2XTqWw1X2(Zc_k$g=g+SoAYyzx8htQxeX*+^N7-B15W^9IdL`#GDY9qHqwMTF+gHpu`84goBBxiV z??-gaObo7F#)j1UjvU$eXZ}a+1*RR0jU_I9qDupcJMx?9$zq-T@nzWTj>R=j#j}yy zohVy;7V8@t_(Vk3vxpvh2COvj`EwMGRhD#FUCEpFc4ky|HJo(n#3rPIKPM+wzH4g> zd-UkhXhC72)WY;DG2^Fa@{?3oQdCq_HUSA>Yps!iI?KFvm=-%doGpIW(jtEP{hsXO zteM( z;==feD-Tj4W4He9ob1UD%8klI`&_&uRb} z#`da!-!dc~0@NJp3w6uDD91;K=nm;E$P&c$8$n*T?rdQsi<;lhBKA6~4=}>a|g9`qh`t;A% zuxUUrYrwzGQV#)w7)*;C9fWF@B&4OEKXDr>c+`=izV42KubvJk7uUvJyB@PUdjKaT z>~^`UGX3VRqFv2|a5|NcFMicD)e{OX2gs>UV8b}lT_;Ls5Dn5LQ<;QmUi z+>*KY~7X^4rcXW6>-BCs_FaDG`fdmrc?6I`@6#%;JJ9-XjXXl7vH6+Vzjh z=c;KFo%*&m=^i=MZWLwF#XmnS!rb%6R+Hvrc)jLw<08y$t(3GHsc^D?)j7bf@!kqE zoymcLSgzzh0>6I!vRsCesc$pPE6VKG{Km4x-no+<7U?dwcdxnB`E=Bj6rbb=X=%xR zKPSmr+Dob!eeJ8#ye;Fb*0kR?aBycTBxNRe2QAt)K!lM~~%Kjno zL#APtf&ky0J#H(p=?v^gAKNw6<6O`hN~!?x%eeH42re!D{^}9;LRpr9p8h<}9qmJ4n6vL2r@DlQGw zMr1whS;8hJh+D0k|M^u6k12xlgP1Uy>z$LHzrJ^!XhO{U)K)c~;yw&&(H}o3H5lwFr5j(sgKV^{&!@ssSQniK%v;JKlp!9mxh#ekwo{#TRIqA0EPm$~kxvp()Q?dd6n zd_v7J=z9mJouRenb$0q#Jr40l&ZRFZ3 z*oq3Y^}2}3%B@?sHamB9b|zUrzd+Wds7N|GI(idcU-L5O0H7?b*F8nQa3Djg`I*m- z6AdUjD=4U@t{xt%C<sw}2(^86opd*jMX~ojy-D0rbr)MXy4gqY@DD{G17Ez=-0QQ(bk98WNKbZ z(Boa=gK#?lCk735VCbi)k0IBYb6iYjxj|?sVBzxA@#`NI9gJ09b>&z?8hLOkp zKGGCicD&qMJzm&w6C2wF;8r1#UAuR)1Z<2wJjWA%pt-`&2b*N1ldf@zEyFO287%}U zAl1b|bHgmH5B1le-+kfDr4+cZmV<+%(6Nmcr{;O7#|YIsu@jv187LUP5QlE)#1sM- zjFZhRExBX3ltlNeuJX1Rv#Va%gq}l5NlDq)Jja%VLEQO&Vf>3_7@z0!)tsadE8!LS z<$sVOXf!o7i%N#Se%%-p6cq1uc4jnIjD`k1!E-dPhV5=%UjOK;)O@X4ia}XPHF&6( z^6K5YcM>Rs$|J2v^dEbiJ8}{iG&S$ z=uvQtcKtKYsz0;0lROqsG$UhTj1nGL;uHsm@>5uMpJoT35*cls{`s>3h`Q~9keHj0 z&8brtanfF0PDdTT;GObibhICZ)xM{A=ecOAa|O1qc(-j+Hy4nV-9k=R_E(?XOa_@q3Z|a4xVP(VPshoN;6&6R~59;$H#|i^=jeP+Y&b6 zrZhC0qp0wD`v7K!C&=!47DnF-oTs_SymRFhi48hvJHGW-uL_f^tgP%irc%=}Zz8UQ z-P3S&>g?$mvA?y6uzH&tgtn4)oK~l=GJyI!S3_Ohi;+*%l9RvQ$oU9y?;WlUGQm5L z#zakBeE|J{aiza3!PPhzmah{eu+?`)6Mae(8RpI6O7#UEkB*K8Z&0fxbQG5H!kdAB zUsG26{QQD(UY}dk#W^nAy~{~*)bZoT!=~||8PflR0BQ&Rp4jc1u5=|<=Sq>{LxT%| z2w{@>4|(Lg$0LDeqk65(%DtTzW_w(^>f<_|#NEUqs6=RtwLhYsSg{ZJ_2bL66diob zIW4N2wwyOr9^f2g7(HUs)6;VusG-ibsNy8`^Zx#8;wN9Tl4A@KgE1SexMER61sb{C z4-Vbr9gPMMe{pAT02^SQ?jF&ZnVCGm7N=|UPoF;Jjz7S}dGl-gPOZyaO5~NK+#~ip zoPBph>E$(gN|JHD_2|3w%Sq6?Vsmiv6I2Tm6hyeIE-`tDV_A2Tyl9m61A#`~%) zlAVyqgS~bg>7bm54!>e@_G}qcftbW>L&1!v_`ZOP{^LtF^)^HH{p)T70w_Tt*>FQ}!r=^e-vU&-L?3II2nFI_!7 zHbBhNRU0;JXnth2TUjyxXL|035}XEs`+7nET@gWxbDGJ=EG)R>y~hRPM#Kow&=JnJ zOc*n9gOx0F=`(i~=qh&ZdHdMz2vGgZ$%&~#u!T27k54OBj)xyF%FoyRRc&;8<*HQ@ z0rQ>vY+(BgVASg1mS8b&`D#Z_?PXX?p8hpO)SgAYp_U+rJZLL)u0H!P^ ztmYUtZ+1ls+nQ@(VOxPDMtgM)B1`AK7Jaou!W`W~N3O-X@N26judbHjDRFyv)l1N3 z)v8s-W@bYG2eiXCuhE~Vo77=mF4@?uJcoDN0jAX-JvkRW@IEyqg>mw_(e@I)EhiE~ zKA?95nll`m{7~}=(A~b}J`HIS=DYnbo__!2JTTwG>}$SMs}FD5+E|HQ36}I~Qg${$ z0Y^71lHd_AGuoW6%TD`l50{*026vYuwv%)k6mEdVDc?+_Wc#jyG!+x1O2!9M`%$_s zu}Sw24hEw}C4qVqw5U`-(;WpOmv*zJhf3FPJMA1Z>NFg6T8=z~te|P3AYNuA3k%DW zCr?Nr&p4XPc_%SZx@=r~i{hZ1c zeN_lxyvoh8uSYD(U4A@HD!Y2^8nW4W6hoo7u`ge&Q8n#*pO#+d1vke#Uo5^X2xOe| zJ$sfmaG#RzH2+?dJaqca1O0wKzNCq#g~;Lbts}(JqiY}XveFL3>+RsFuC3*>_&QzT zF+xWe&lybv!@BU#|G1nzE4$RWGyZ^d`*lf^k;^yoOG=KO3b2*(SOo86dJGnrXIo(Z}08?R-TX+NowH8gc2?z`U zTK@tO#U$r=KKgzFV3rL~sA*(Dzak#B=jUD=^-J6M6K>v&+Wz9>FuRDo-a|&z6@s~` zUV#VTPhb`(X}rGkBH;vT!q!bof=GPo_md7U)c`7kisMhnnvF)RD^|Zqn9c+P9o)=l z{`3sb87`^ayEVbhiTg|*L%+@2AaVQFt(w+W1M!w;s45=sjRMWkg^C_~l5~aY#+57F za>GYKPe`1dv5#hxKEQWRwXXN#^IpDl(GDyJu8RnrdDMDSkGrbxo_p{;>UHsfS(u< z-m>EDn~mv>lal&vx|vQUvApV1^qbE`mzF%1l$QRJ`RU4T85s=#sQ#g$5cFR(*X<_G z>G|+e8McnmAp6Fp2c8+#8d^b))^4*{;>^a;q7MLftjd-=u0Z_U6e zrC}0Erd&XWj~@la{cKs)r6m zCw1bG3SQK43ig(bVy9!$Xu%}DSj{vO$Uom<{u51k7yOEC0s4S*a66_mv z6i5CCJtZzf=!$};0RYy8?_6Zl;@>P~|CWOEcT_NFZaS@|59pIA+bmt1qc!Ka*@UxU zh7OO9a;9<@N7Q9b#rdZ7$@dJ~WrtQIVb8&LNMs((jR9e{k2jWENJ!{N;(^k`Mh}md z7x$kY1ct3aE`0ak@GCC`>NJ#0XecErUFk>ep<755Z}4v~f3qUeJMTey`VbiLTN~pF zMTLcF2?v5h&(wG+IvVh?-T(8mvF6k@^)Vb-;`jcbS3#QL8P#(BP*&8UyMcc*4cd&5 zy#ez`kDTaK>Y!U}Vq^?OZ!=UE#SnSn!qtwA8#gwfj6LR1R6PXHM)fuKo>EY~MbMx4 z^v^#0o1^-7rC{^EJXSYqZ*Mowvs}R}Vtg(yPaI*tR?MQZF6(ISrcIkDh{!N!=v)kF zEM7PJJz(|o^HaW)Yf;7gXh-}3^=pWvrggglnMD*P`>HOJsCe+g-QB$gou{;nFcUVH z{?Eq0T4v#2lD+@ySF!G>p@9M2eYyc0wK*3QHNiJ@7T8`)svQUZb}Y$4xBBFr_|Ba> zeUwjH+1P|2Sq%_=Wf-`mhJb*wH^4$vwt$;#Q6sex8wpMrEfCUwV1Ht3H@8eY`uWV7 zz8_!O7!C- z3=9mUE%N%(8hb-AXpx=h4;eRlPBkVehvZpSuMDGFLrIXZSMmQ;H&KZ)>^}5v^N-0g;k{ zgMz2CJ-(pbu!Jx{&D4HWc|7LPlP7zsH$KH$F?(*x?t@x#zn|4+cctCb#-`XpdhFf?(#$v(D}qIwdDtap|M(woYY;ii@;IMzeINKv~qTS zq;vhpv6bg==()wkbryY$jag5=PDq~H=fPc-JG`V0f>9NuE%i&nnLh!OS-FkxMP#Cx z$>knE3)C*Ulrb(eBt*M`Y(l+%y`)QDB~#qCZQF)0+ns&&5A} z{_`{Y9m=g!9SHk=h5Z+Vl4uTjBb}dsjhd)?@x>O#-yS1^?a!DQm+^BjG<0&x9r z_SN0{FBNS%hz1cPd1m&z0^5UoejARjTWu&@pv z3hehK_9C;KW3ttGV}wRR&PK$pic(w2z;KOyo9qc0a7N2?vZfB&5ag_Ww2zu#^RD1| z3G$K)pgim9lGS`E1eFEy!}@d4p_h<|-am0uwdx#uUAeUASN&o|SiDmekY}bvrGFIC z-G!K`*dqOg>gp9_iIM1-8u38U8@9?k`{$nx5Yty(k#Gt?CAzB=BwE$;m|H^xEzW9x ze}Ae(SsR87oOaCJW|C(~Mz{0(D*6)wKncYwE`j zb$9oI>%6bg6t5;S5F|d@VYK|gplRE`)}S@Xc{(JHej5og9Mn1R0hL#|=V#u88s4!F z-TUgrX8@Wsfo?&HMq~Zf$(^M&4YYVwkK)Y6eU{Ib_0VSbstVbEV zpO(f0KL>h?&Dl3@eD(zucNxtTZSPS4JqiC`u7~dJ<=DU`bq=`$fc6_YZX(C;-MsJ9 zXT1{u)ZzGP;W+R~>vM8)GW65wu%vXLUy5vi)kL9hg@IJ4X>O)RcdZ8H&&t|57!~AM zU!MiffY$c0?@+OLS#R=7OEcjG&>KYgvpqs0d3o(%nfJs6Ij=9IIj42L#39cNnH`MZ zcJw%|V2tpZmF+*}1DRXzyuL>r@6cT+ROa)Qj@za%ib#Wdcic)yNCF9)us5KrvAC6E z|FTn}?ynI?>)`y4u?R?zQQoUx+Ly1&5|@;cszWNGPTY^fyvMcwoQer{!fj?$x2Ssp zWLgr#7|8CP=Ec@mZA`K83=gBTfZti`ynip^0+(O5xkdQj(kJW3t`@0O? z?9}WmbH%HCaXp`^%HNanfF>q7i?770`oRnamusb_rnXf+mQn(N*#9Dy&%P})US)8@ zl~{hasea|1>JK;&_00VI_tVqYHM+Z3j;sMG2F0F(97|-i;bFjL)$sM_amHE(^4HaX z5^u}2b9TOD5jdx=V_xKFDCN?7Z0f0|y1G!|qpU3K`MPS?AJp5BsVvHoifql&k+Y{m zL$ND@hb3Pw+!1~U0!BfApYjY=f%wWJ2lzKb>QsB=4N)%`nmesr>-?@?A;-i}Pb* zFl$)jSOwea>gxVN^m?@RB30*|{vcPqmKvF`7Qyo|sHdxI00&goQ66%br&MS*QO5wx z1mkYryqS!B2tK9i>Y7(Gx(+${x&S|L7+QBO!~;vTwi`Ot_mu%=4(m}DjpW!gtQwkv zZ>7Eogn8v9!{(Zr8a0NaFd5|oQaSZlFZ-o*dtDn>qbl+4%zyk?6&NQ@wB&CSO)Hv| z`y!Zn-P?!y0%@Jwg$9XXc}P0d1k zr!(>AorMkz5K9R2d#NP%@Z{tq&vGXZ#I(pkB)}gXWXtGxpW-l7A19aI?ydTu~fJNXovA+tb1IOxE(U{9-3Fk z!I}{YdEkO*-&5aR=u6{sTwvG$x=2E(%rRLP(+-+yQ|E%Tguu>p{L|!Q&UApGrhuJN zz?Q9BhjObHH*+xY3lM?@vJ??H0D9gumsxiNqUUw$%u^m-BwWBmUG|;5V(~kU)}y}f z=;oEI9rY-)q`>ZN^If1IdzB z$=o*+68iBuTz!0wvYIPmGZ8-Z=U1(veg2_BLqkJ+_wEK@p9p}xB)Es*fhua$J^-Hs zkv_pN)B;@q>{B62?2(sGMgB?H9Bfp(^kXQD2J&<<8Csikd%-M5i$bap?>|8shvT-*+$u)$k1<<){Mw z5&zsMYF)8m#aSP{{I6g3@*+hG ze}4ASy4?`cUmX<{)lIitKO2Mot>N7Nj5!QGVYAD$5%UdVGB(jadL7wCbpY}2C5eb#f# za}dX&Xwuq3Eh45?@0Gmq;}a9o?d|aKpP z2)pKp7r^Z>K!9 zIR5o43x8br^T+Mu3;M7E^xwjd#NrQpm*+19SO@952Fae`I43*1NMvGU!Ozd{(%IXB zU^9UJMr;O*W%olt{8@wPrM1PIwCPD%a2>XI2gk`<2!=&D|KP!h>oGVuI4@`^kc51@ z87M8N@DJK&3Cjb{x4NlmBZ!hF-SIfTe>F(PMwlxz((K*epe})eQUPoxyfG(gdmZ3w zA^R2*^7OV5|C+JG-jjA=5w2McRIOI?KDY((KWBb?iGQHFibCvk4k|T32eH?$Ul;4y z)>4?A_{kcjCDX_Z6j_GB!_AxuXRdVQQ7FW#h+dfp%0!d};t7uV0-Fo5)j`t;kI^2u znVgSa^{>bA*GY(4+>o6?=aiZTy}Y_D<>gvL5qZhG3ckUp8-gP8fEU+Ak6(cJv_G#G zM4S4ni_ui8g1%=0!RWt!fG^Y?nN8pJb=48yx00#TUwrjOlyVEm@XiD z58@`TsHn~Q#B!F47(zQ`(fdbk;ZH)MqP)pRIbLm1EiW9Tog#Utm z{hwT+-7dX#(A23oVt|{0jPxs=t_doksgpS}hJDry>}|_7VSxMqgIek7CX^EN`1J&7 z`$z)KFram)!wF?<#2dN&_+lvK)G6N4lU9?4-T@xXI4-l8n3$=VnL5H@yf=a<{i*4m zn4kr0b2V$%So3khK4=A7N$~wUKnT2wipP_)Q5ic+T<-bH-9!0+qJ5l4XaH-o$Nl44 zG!$_|f0C9Spu+Fq=SPjGZD?4d|KL!wwZY$YNUZUAMYVh=Xiza!#cTGVku%JB2fq(? zpN&*0WS;3(p3G=0R81e=g1NN|CMhV{@j2I6@Obb>=$=8m894l@t5X9)7H;imYkLkk zR((v#|JNo^fU59C_V@Qw`1tsU5fC^Fc#j*X@@lG8E7)tI1t)(wc^Oxnl_oV>(zfv#%%VWD@G@wipKOM!V*OSb{N?lv>Af{j zdtE9-tTkXi)QBg*+WKmGF}k2=@M{xcLZ4YL4EAR=lnNaxd4#WwbCrlI$L4u;6jP{*9=YdK-j>^8ch-V)J zouZC*w|{grA-$M%{3GR$A3wHaO9DX%s=*{#L`aARen?u|-KXDgdb%3QFS_rm0+9Yv z#bzdYj&5p#mBKxrfOAd$w|>Y#RZe=k`_z_G{)t? zyJbrP66abk{Q8!G;6TO@uHpnaP6LzdGmvoxIb8UenRzZ2mI#!&B;{QUE3yvq2r zAICC(v!D3T3xw00ZL$mOOAP`E-V0Z6?|MY(IYbdRyw|o{DX!31iQ5>NgLoOSAnP}5 z(0zKw&4207Pon-~AHs1)c_k(F9OOxHLZ)GAEI$3=*2FB~^CGog-CvFb1N%0#1ho4W z-?p{I*FS$g{DcGc>M(DL)@><=l=d=ID*Egm(CrTqp$jg6+zxP{_$Pt~azKPIwtt(N z+El0yVU7LkMaq_+lq!0fDAp&zcYvFpDhle=c4c|1F zViWcwj7z0CaCEDwz<0hZq|t*Ap|je{aC%#ikc2*L%s&Hox7`^6VAA`R7QM>5D4wnL z^%@faSU%l3f5)qxviWvRoDVUOLYTc4m^V#8Eto#6s;ZI#joi^g-_w+o#C>U8l{mG2 zAG|K|QokJCYzsD4C?FBbmxjZC1t|1r-s{?0)wg%cI=F0qV8w)`B_%gsdXAHiPM_`} zo)KP514jpkhflUtXQ4ju6yZ^itvX2MpR{)FL` zvDM4NLqB5|S_I`ppyQ}w0dJ1>ckiyj(K&Kv$rh#$0WVKagLyd_$99#>i|HxtnMn?WJZA;6fRvc}e-ck>{xw0>N z*XTVoVB_Kv%jcr|`QKhIlJ+5JT|x&XXCvTzE5DmE08fN~9ecT?y?=i-D3;+M04g@DK>U)k zom~X>)!ylMLFt=aNy9N zR5#0LC%Ub}k7lJlBX_%cc|AwjY0czfW)7$0ko#zV;Swl%db>AwbjuXbgCRpSfNyR+ zt=|*)YhVqj5@61WmQf$QB?PZr3-^4`pC31hYZ+e19jZL^xh2ix!%0(9Hj)N`QH356 zFbQ>Y0OK&Fy)k8|>*vs%^MFY9^u@poaakh4roi_U85O15-Hymo%caKosm586X7;dx zr$<=stOnj!ee7az?kQ=asJPBe z+g2A$^px0u+?8e16_h6=N?f8+kh#qSjEfM|K7DqOr!4mqn{N~it)W00f0$+;O1M9` zXOAxaB=Yd7b`eO2Ys0T{Nmz$!q&rd0p|!Ne;h?o83Lw}B#ztN~zGXwTBpPJT$@eD7 z*;os-YH2Drk+)DC1|Y&GB(UDWn_ID6c=1AuNT)x)_C?(7U8*!!S5r$u5Jz6T$WR9< zgqTuOL91agtFf-cxez7?s7)Gi0!$G$LLxihibwQjel%IJdGqGC`8Jx(F#LVc2O|ESdp)v{OKJ>TOMdUwJn zqgsMo`8NOK)UpHeV-YdXG@^r$@Ep}CY8N2KGbQ-n;mQ;h;Zp+A6j-TF@oaX3D7Oy` zHeuu`Tlm()W4sml&QObjL4pQwk81Kc)5K%@>vQ@&MmcsLIGM4RJ3NX1e+@)g+Oe}| z&IrDC2}G|_{UH&^0aF!mDkJ`AfG}K@Dn3ns4#fA=w_6JAn|Ygk^BUmX!E4mQ)1Leu z1Qh%%@k@#>y$X~q-z)46YZ1KFo^yQ1bo2@0&|Ef3Tu=3Y#yp(gY^;Y}TN8j=_$nWR8s4%-4h!-_0{u4J2-JWeZo^KjIeW)wXF&b5XM zmbSrJo57yU2>{@LD+W1+WK2l%ry{UPVtJn8U_U!zj*^`Vwb|%^+e0<1o?ptf}cpvdF0sT1s_d9N6W}tv7es#aP~_fXnhN*mu$- z!Vl@bBuN_?CLg0R%91N71g0*VI2CqF3do%3_5Ore7UE5Uc*v9C2L=s{=gIO!AYdEp zGwsW@n6&#sVU+W?g^(%GeI3~M?CiLSCC7V7w4|(i{rAj%?Yj&P0`k)eB=#rn!?gGT zp|Uqn%IVqJk0rUCWP%A7Tn0QxD#=e#rGx2J+z0V;ND=gLcuI}C(G&_ASNQ3|R7?U9 zh4D28(nv`xkAD93+Cg!<`I_$rLhbwsj_$o<`D_dfd~#(`F)?(Gtr_RR{iFbD%l2+` zX6O;1q$Jj6Hdgu1r4;=0v@ksf>77QyP-54vS{yE>VjElAH1R|oWd=VN6Y_>w6<}F1 z(imL!MSELIxsMB(yu6~uSI(Jr3jIUyh*xWdE|!8~>EslR5VYIs((bVX#xIAJ2O#80 zz{rn&+SoSQ`d&y)ZLMJrKpeihpa08JsT^>6GjqpV^kWHVOTqO`<6pmC25t~;eR-=e zX}`6ql%4iIlmq7VN_}Xe(b$?NW8(8vus7@#>!Q?n&!AV*(D{kxeOmhtqUGWnhr9Qh zLJk2ZULFtEUc-XMJz ztVi5((f)yBZ)YidRdKr2`^-oI$hhtGk<%7g=K9E)72kf=V5s`IR4C<-d;f=v&=g@%z zS2rFPl_{X6peD&4>Cquz@CrV);8B^UPoZXl{KM|e@;uH?)jVN{2}MwH`Z|l?jWN}; zIcVYA8sH&f14iaZMT~(G-(TF*#t7e9b3l_5){d5Dw+UX?)vSQ#XdOupZY#1`( zeYp(viwv+R0|I~h;M123ZxaT4Ht6Z=d*rc%k#60-w1A#AF5d36W;Tg1(R>@jHYql0 zx9pQ?<7e+Mz~6Z^%iO!e#>>I_{`uDupvDZRdT(648jRd+^|iNxn3OOhP8iKSva$ns zvzIcZ`^G)46rB2d&j1tM{}6ijK|oU50>HPZ@Mm7)RAfV*NO?8wXFx$$K@ z@8t){`OiTulAD69L1`ku*D`1#;eRJ0uj05Z4e8Dp- zn}M29x-nsy>zqzBpEdl;dTu>->=>18nA_ct5IB+nO@pn*IoR3N(y87SqDiQJ_3B{q zDjW1RgzUf=MYaycdMZ;m+~FrvhtjEH0yB+A>&qQmg^!8EQoUex>;r_Gn02oOM}l<@ zenYZNDyB_>=g}La`0UJjSDJdd!4Q*BZyy}q$`6Yr81pl;<6FRqlCgLs;pIuXGVpMO z_&{6yz^I3sM_BlP&h;A(^zJrD83{7Zw=4@UlcHsQ*5#u3@XP;(zCK|XO;hc9@7T8Q1DUiWPWBU-%}UO@t8l^Us>4T(5%M0;)>fu(|lzY=(grM5!-@F7wGtwAhV$3%K;QljW+XwmT)O@BjF09~VK?D8f>7GC@xR@VST2X*3>ZZ_e)@;F-z{QS}EvYWxc&s92O z{(~g>WQN~(q!iVO9k=bu@wCBo51_JH;`2K`$9Na5VnuZBQP5JPO+d@0k_Z)nOf)$b z3Gm5Ghi1xadU<(~6@iR-Jb3{OXG)?xkwW7zVv`CsQ}%*B2b4jqlpL&9qGA_;jiNSh z+r+{mAV!Q>^(BdhxSjyEZo=`z{{TNr$bRKg;{u)Ku~U4!{IfqCs|$cNtPxdgW06$n zh^e^|RFvDdL3JsI7PVvr@K@-9n>&lwF|1j$2Bfm!om^1gHIST{zF=DJN7ex?$~l}Y zn$4SWfr5~hTlAhL8(MaRE@%5X=lC|Jy3!D#PL1)f_7w}lmD!%X0nQ6IURc!#;jl?r+ zixo63_kLtLW)~>NNQP8UT?CIEAyxvkS;vxl@ssh}`6C19=DXjhF1GV6hl&^cF$AdG282#<8RXCA7Z_&%2@bP_+vI!M=780b;fl~x^aNkc@1~qN zdsYR*?Fz7xxpVX&`!r)+NE?cwHI{(AVi5KLZ!hNDE7F%y;@`eK1@o$yaaL4RBAMv&LAQB;fR&0o))qK*ztP20}DSa(5Dw6EKTm?+?-Tu_~>h32H@YejYnL36Dkh#@pSmUa2v+?(PfqPjf<#}*r|g3?ebS}vx5k$ z7ZnBkEV=Y$?VAs;-TgSn;M)Yr1J^)`${dzX7wD<*^anNwI8XfNB;j+oJ~X>73$8EX z`eBv#7J*w<7hr->%$yxRM1}#;#j9&J3TD{+Alajt8Z8x%shG#$S_%>Z1-&}RKo^(G zm}*^nHI}S6OuDHkqr`Q9Q6^sdAf&#OccqXMn1R)ew0h;B+>qO7&_be@Yzrc-w^T98lKpmw67p);i5nRif+RM{%_n$CD`Y3=b)n(mgD8TgiMJjCc#I<2!EkO9yLKRr zjNS$Q`EiukPsq^Z;ls}n;omTyMnz5i(!y`|?%iRtp!CRLu>|c%4hy;L1>{-H>(}eS z)Zo&O6?j;_NY%E|crV680OrWbqnim$r#n#nKRrx#(9$AqIbnv9GHi>JW_a@o8Lq@i z+V;$1E2w^u`xeGRshsf>A>ftBJYd;-7&8OrXLP&jQSOllR#hKwsjjYe{DaSM)Ejh#1?}&ie#Nd@GD`yl^CV^NJ9fGwFiz2SYM*S=Y{#3Z0u6{w+TA|$gD_|O~vK_9)@&nmAL5ODJExnya{zK z`0Cb`DcA{{y#Put;vq7fl*C595f2Wy4&?|>1A{mffo&AnSk_IZEOm#4hi6JZ4Z>an z{@h*}DQHtj@0IfzaUEPIPK?>1R1}~aHZoEKF3pFV3YB!{vIyb=^ zT0iso9aEzPCPkijzl_^y#tgfFQK?dwNy%vr+#n&MwGv|zOoWnH>cJQSC_Tr-+X#dP zj0nT2Yx46s?)CxS@VZzLT^<>}BK8(sLc(}m5vqlhiB8Yr!uKN}|Bi`Et3a;AbpEX| zZ^(X0?HR)8+Yj{RqWXv?s31e|bx4q{rq`|RkK3@^`h*z+5>VCFe#PA7@RdJHhEQ^K z&_>oY7Y+Z^ z&z~%wYQjVP{pUdLvf$-j9!^N{^mL!~(wC77Nl#C&x7Mj|Y&;-d72u6W4?{^v0UI#K zV8D}{m35K)686}@#14o98l~k5jq5?~XS0n!^(^+Y_-bi;KTq=GF>kl8IhC1Mz}-u$8R@R=6YYqirjX_}SwHBS9~Ej1@IW|?JZn9=`tsDw8~WitI!zgPQ9n`EDP$#UW|SwjCCHkA$kdXH?SK6GHWf8 zFEQegE&#g#xmcBWVbCkXE64@+p~%~Z{DuF(WJhisL9HiOnUGl@BsM{#Y*u7h3GZ=Z zxcJw>a@0*{$cI?CfEXOa@;$)>n>p;-@Q2KeZpSq+AQe^M{3fm+T!XUp0z6>?=JeDuB?Gqd`Z$Vq)z6oNbBl`r` z2e}I-+gO4)@Bo2Tut`$ITqt3+#IpobB(a-9Ck};hfyT2L=;F|wJ@mNiK-Tdw673V* zKSIW4aaiflV@JS+L<$n}gcV?zthR;_OopINBZ$!q_8c^C4OlaB9~kt|gxj}kL41?C z0-B&1rq4&(s>C6Oi|$rocHtT7(z0X_xWI~TPYdb*-T+S%j?$KlMX+zr-hxFF(4u{x zX9WuaD+!Ascq%b3NewzL0?af}VJ%WV#8@&Q-<)$?KK`rv%an(Zq>Ibfm0M!Nq=qncw(JrLx(%KdB(yK5L&)F0nd|R}2=6>C z8L_bHTtt!~*SNqqm0?MWG|!f5Ts(jlw#`~iZII0KFIC}!37n~f=zaHBfVIcXG07@j zaYFj|2Fa8&@s@v~VDnKju$iFXTcFZ~>6${u4Z78`ZiArmlKHhn60zipwy7UKZixo=r|5~|%s|Lw1AAP_!Fw#`ec7wJRVL;{1r8^4G$5=-8}Vwm8V+f)0G?UAtSn zicHCn|NaDRvYYO_pc$lporJ!-f1A}vN!P#i{_jsM|FdNext!8be(isJkNoF+@&DJ) z{{Q&e$^U%*|3e4la#{WRkN?}3bp{v`&rPfr{&FwO=C=3yeV-G^ALe$ng^u!Z{yg%l z|12yNa?sNMY51=jLMarqYDVQDZ(xw=9bkepBu~U6!s$~4@;*rmDC~svg7&Kj5wZgg5r;mOxuH!_;mA!CeyP)7&kW=dDJaBKE8}UTL-Ggve2Zf4Zdpa1P zMlzlCETDa%MeQFR4uj{{IpW$^!afiW5)=XAL&0rY7g)q%K$Q_1X(?*t+<)9UxtvIF zVO|z4#1a=5r%>)ccrXZXy%uAJ$QZ;_O_C{Kegha2WHgQs+A$@U;{#!Mf{&q`7HfzEMl=bh_(EtTYxV?ikAszcV;;0L;L z-oXosD^blnyWof?_gF#^u7%?b*l`qE3h7}A75_KDR=!|ekpd+WUVk;J!PF_yNzHc`_=8^EB-p5)wiWCk)sGGFpbWC$j`-k~|dU$b4k~ z#KZ>fp&!JS(Nc!Kj7%ZHO?Ck=+(|CiI(zlbGBQ-+n0J9F9*d+FAr385JOQ5`eVGoNGNp;*Zf;6IU8M zvFBrB!7Z|r+buA(aB%pnni}<`e|g&F^LXD7r#cvzlwyfPV;ZCJ8%2B#A{DoofST8w z_~Gfv7&LtZ(SubX7YdO}%#ee~Xd93ofPXk3Q!+@TE^mKu)7EHyaRkp92&*H_5s-#7 zHiUek4$}h|Au^~qfXPsDwI3Q*BC7!=3ii;o5MlxW+gTN)1h)dNFCtf5ikKCz2eG6H z7Qeem*&7#rga9H~BKUDJ+-gjxSz^WwUKboTdgNUd{tX-B6 zM~Nv2|mXD0z~>GqFCWz;8CK%7*h7Z)@hV$ehFZi#CGrxcR%(L10k3Q0EAE*lnKNnv$3&J*Ct_DK-Bqh z2Y_WFwK(qP%_6A)R=~`X?gZ_fq`m@bp@*BL4=R??z9&-+jEpLWzK6%abO%LSFi?mO z^k&YSp>2m3Bo!=VMXf+X$+NcD3zkBDP$7=G`TF|i`;OcGp}s?_rl}D1034i#+JQP{ zum|WJQTH7#+(f8=AlcVKeFg|Cj-!HO3tu!4l*KQGO)Rb6;xyz$h$HyeC%764|NNQ{6%Y7{ zU_2hIO}ypZ9h^KogtYa+!?|YI4ZfKG+qNSXAk^#sZTw;5;y`EYW7=8xGiewYKubvh z4Hq^G1AH!8T{Y&)3J(tacpcE_q(?=~3D+W8Qp7i1JOX|g7m_WFo?%(feon^O*}1df z?mXMVV`mUp=@~&Z2KxtI+|hyfL}3242Ukc+)pE=K%|+1MlaM()&@+}QdK>3R6m zWP}B7jg`3Gt#iK*_iRlEpDyTk02M>xLBv%Vc8S;Z^%n|Iw>!iH2M2rm=D@6T%MU0P z2HKcpJWYYR6^Luzo(2XX#aUIQ1^}{>5vBu%NX>Z*GOIZ$-8)luTOI;>nnDTGwOaTW z1v!D|+WqKRMvgl4Z1RjVwp%jHmjl|M`7i|_B+q;S(-gh8kX$gT8_+rzrPh%FzM`^T z3~o7%-O%x@ox;nT%p~Jd-@IiLs`u#97MybY5O6WI2pgUwxb;$ibYXRH;!oX-jf|Aa zw77mA9}t-pcO@CowOjydAb)1f_eI|zJhjr#Idh|Wt(Da!*xP2|`$$vqUJpA1KZs6? zm40YOD=EMS?&E3A-=YftJu8?#&9XawGGu_~uU+InS$$^Z>d!uoj>)^O0J4QF?zCFe z^LgXhC*d$^uE6d$YyG6~*GNwy!2J;9HuwsC@ZLYy`J#SUwJOCH$zLg47 zGwf1^f!a0s_8uf(;2D;N&wdqCe$546>hV|Wz@uoHY^s2 z%QrtFnPB{20z*z;hT`BvCNlt!tq%N#fDC!buAKc4+-?1`2w3%grntj&#&%o>C{b=M zdFB8y@tH-rr3qiHjXUxsr4)Dv|7)0oNjqO*ekgBWHkj@cI#T}T3eU}gvu-SYRa0Z) z^hjW?pc1q3f&@{m02t_L+_Vx?~pn&njSsWKh0Hy*4nchY2(_m5$#3$mW^Lq88% zPiBL97j4+l?pm4FxU<1UYHKN}uIJsV_S_Ux{osc5?6kGX9}u`0jI1Qe(E#^2OxI?CubETSBanmcZg|wd*|Z&!^hLy=$Nx~)DW zhC+thx1(*jdB?jK-pGKQ#s2GDG2v5L;1W&r(2Rz}J?V!{559UZah-}Z{v`#Rj8O>*{FGDUzQ zU)%PgR)AY12kC4qZ(A?>^%%$SbA2>|KjC0S`T~kw2{yl^Pdv)N&kE+8aTRn&103Vzp#uEIC%P^E9NyCo4=8rD zh2wMZM_R(S7j;dFyH5;!5@)&D;g0%26(w}ykK$h-d81BDX4>pE66z>+)~h`Yfal6; zzD1Cl!82$o`LUsodZLfZ#xj-!;JAj`r014Z-0ea(0@YJEbX5q8>FfTWcBp>s60w=2 z(_qm2Lch#(e6>jq#vAhEzP4wz8x|eFK0s?Z7u*eG(!AHe22%8Y?U}-3KAxq6Kv6?c z`%B5l=oGA$mF0loM+?LR6(59ALPNNs`2|QTLT}J4Hw}$gYz#H`40yJ2k-QM96*h^a zCwI{TNp|tb(LWyqNOT9rr$lbzw=FscQP!1!on>K=Qa(en-!?lJhYY%*ocverKuGuP z+b0*lW;!?0J;YajbOJ~uX&*wdk8EQy6Hi1asMitacK$+I0mTd(13zo&rAwEVE~3^~{3hk-hz|SE@;8!&-bT7qi5x+Z1Ve_@S-EkW zA;JqDCkV*0vX&!^%Zs-I^lwj`CRyY~ZGKa3s&vp?g=t)%IaBX3gTa)=P6#oliqg zAKNZYXk`yNk*KQyve0R$u}5;ZH-x{xW4#4w5Mk*TmpBV7+xspA`{4tWLJR|718Jnf zy6fbDPv+MNXXpsY$gn|x_ZW2M9Vonj!AaqoX=Z6m-6&2b@a(zUrOdGBZCXgGk;etluu>x<+0sW<5At2~;K*dWtsaDdGVg0OyCF9c1fHu(m@S$Is+y_v2}V>m_s5iM@v?zGy&Bd zxHle|D)jM@;m?o=gmq+0a2kJl&|f3Zzt>>IdKzvmIQ}{lmR;U?myndK6U>6;WV}@i&a&#A%*sB?PLc2(=_Xnw-ER z?w%_m`CYji=_`A|*jKdY*du`>Zw%ZSybv1z4wFW;%8>UZV*Pf|kHimIBI@ByGHh(*7OAt07yIP@3}(YT(E{2pF_+HJwgu>cix z4U(h3GG39CFNk;HhAC-V?nt3H2*8Of(=UC=fh?9xZs0^g3KO#CT)cQOwZ9-OMf*xA z1Qa-r*WncFn6;rg9>J}Ob{f=jG-(I1_IN^qB>HjDd(8nR8irRh@Q&pB(if<7Bg`RZ zoY1YK78ppL0;?995P_TGDUilR2fNIJh7jP6{dj+v6ox1t*x-C_kYb92F9FXVodpWi zMjZEWy-t+0Kzc^`;eQw$`kw-m!qEX9vVcK|2fFU^X#1bS66kf*s?;CK(_gTe^`EoA z{+xpxoMF8!}!BQxBr5qJJls|tihfZ z12k=OLi>ZT`my{ON%S(m4*eI9@@G2C)%q}W2@Z(GXn~_XIk@ZEqyFl7Nk6|}tpM1r z*N3M4y|MoN>!54RKeJ!{{aZAt!r}k_crv@C@SMOu$a!6ZwM0JyO~n6GQc`jaPLx=_ ze9N&aOi^)Jb=lhkcP>a0y0dOqp>ZGZCCY7f@rweD`Fx|B!?+ZVWP~kn6+hhAT%ZDH z31TOp$Hlat8$J8zGZ;Lf`VcRH%t5Bwb!23uT>X`RyxEq*-`=j~V=p!v7qS3i6U7Td z(Ix77p+);-NF5D;QXm@Sv-rT>qq~~WT0(>fHb&cnOn5o$0g{RXcN`9cbl<@VhuB$| z7m@uQq2qvs@+bchKR;sRxOdKYbn*qlWZ^~0D5F(qfxh-7StB{{#P{qi+dXuK*{^gw z_dR>z!c6?cf(r4b%a<=J?|LbbkC9JEi(g@O6uy!*0b@Qzf`^WtV1RF&=POq(y?YeG zftlWYmN%>)2!QCpHr;!(sqC_G!iygZ8@@~DjA!+pj1fm=LOwUBG>D0Z-zzEGao~2B zS_l^^o$9&%R{QHJ56$oW{r(p&Y`z*d&#B`Cx;ilX#2DS=bdo@gzFGG{H2I{*+`|5& za{Liq6O~j189ioqMZ*Sw0oWNC_mLB~HH~DT@f3LnzC?B(f*a^dP|&Nre;V8Fu(xLb z-qI&SS{;n(ClDy{+S#>e^*mPNlbQ%L$V=?eyJS_0$cLL_VEYC;JN1@r zudhKu^#OOND2N4bx zrznUxJ>;?30>>}jXz9V-OtVp-61Rp`4)PWq)adBD<351C>!#sTg7S$2s}hzKDrNkd z!=q8KF1E!7P3B~Z&Acq@^|!1(gN>x4r$;?Fboc-H**X={1G@wFs`6otsfDIZ$j)Mu zhL(8V1li3&@c+GgRc5QU{ah0jB6k!eeWKPEq#L_mL+9wMrXT5x^ZXK*HddO_0@{nD z??=qX{)ZBfyo{C~8(j{a$hDq*g#%=_kGlg?wJkC ztC!7NlD_?TTRi%HUTVpUGsymDsd)>dTE^<22<>509wL%oKSNIrq^-K+@zPthy|={x!Al`}ZfP z@#uUZCcCVfC6#FXrKM1v`GBAZ0P3~-5;}-co6#&hlh&Gq6-naZ?IOa$Gyws@|0&+y zOITkpBC7L8k>*&1g$-0da0H=AF)9-l2AU4C2s?EAm*K=Q+D(t~)}<`Z4wdSoa778F zu^^!6kRqHwMrAMDg=?9VLDa$zCWLIRP;%2SD_jlQI46GN&x3Lyw_!w{G<6)}%dh$# zcM!K`)7TOitmTd3r*`5q06v~8e(VO~9L7v>0UMUe+W=W94R=A*PXvkc&!i;cc0pTG z;%J!L0pYZA>Qz`2;wZC1E(^PrVhm46I0k_EJhZQbr~&zam|YI}8(@q8kSXL_VwgJ@ zWeO}7NV9VL_H938Kwb5>&XQ`}w|HRfn^uLpRo`V;i?D%fkZ_V{6xj@gf07CVGZ)!Mkxd)nLv z@6(oc+duez7x41*Z2NNXD0HQyV8&|DAT(rv)OF|eqsQEif*Ufl@@hG-1>kSI4OgCwUcShnB{ zl2c7~&iK;uo@@wqr-FV6{3mokSc>%XNmG*;knO~7G^fi~C3vA96=r?2Xub-TE;d#S znQ`*%7dW`2-Xyayymgx0Qu|Ob%T|e;7D0BBHw)D7P(c+#{Z0;z*>9>(!fS+Jd1w{o%rs$3^3mk#h*9Ncyqjn*gwsL>oAB;@jig8C^g0IGFT?(9?(hm9dk zlS@J&@Q?WhHOIr4Gwt0tXf|jGtkrrZvR$7?$P_^yurbsq{H)Pp$Y;^KLeI~ikH&so zRfc!N!a#5?FnZ1WB3T_`{@O%UIEv!}F2Bazd}RK-!b$Jp;0323Z3y||K~X5L)IkkT za!SOzC)ja_2oNJ^;*zWZnl!SAZ?yO4x%KrFULWsI{9?krcw?}{un(NhYXK>OdN*l2 zkykh~U0VHxkj3w!-UJ<;k+X2=u2Jo!RCUodHk=8VkxqYn*^PQ>b8~Y?-H>X+^y~v3 zJ0;JQz@z3!!b<|{^?UT_k@vR)cOI0wFP_{CWg(EThFP~Rpqhl;&NeJEcR$ARVc?)R zM4Iq^e1_YPCM5(N`~jy{Y$ukopC4R7J0m+JT^%4MvJ9lATr7TiC|Ptud@Y651ZoBm z#P!>w`0)hJAfhskRj+o!`3h7msgeN*Lu^U&>eyUUQ&JM352P0(dn{0pD8@*1NJ=!q zExuowDN9pg(6SbO@ggUxLkbSydM>U!sDR@dM7?JygP&m+!ETlhFa!9_fH?F+VN1*_(1*`*J3#3H$7W&h|DsaIsw=y5FsTle zzgdAVPBx6eCe$0y7E<>%Dj)V`;w`u&;HAZb(HATX)OwiEGYQ^g)O>ukB%umhsHVW@ zDD^-Vjlal7^8n5CAf^fL_YQ;-K|`R}Y9OeGf%3+Jt0^gSQ2=0X`cs(?zGL_ErjhKj zo^0|k08YTpJni^`-A{rIa5Z7b@^Cp=f62kQJ`Gq)TVH=NP%*G~Pf_xd&m2|%Bw;}& zK|)6qx{q;W0QnLbiW=(-vBhXfHf$;c4-6s12|NMV)oDTs3n&$q!qOz-1UpCYiKnl? zxLrH{LAuYdkl@E5ot>n*_Z?{gKX!MYhEk@Y+Y%FbKowsdj5t`Z+2Z^49YbhGAp0I< zh3w(`=X7^>SGH`%rX*XR=u$UW5!0jtP`YGKuyW-}z%1mPxUaoRWY!ulL290d8=}^JibesE3nn;q~?t87-^be?58*Iv*w1&!7ECqq)rX?X9dW zsri__1|OB_$|#f|bN>i76Y9j6oF}z;xK5M6VcGVqV1o!sWYDa0 z5SCe)!LHXl)GT=fRtD)pn<0YyJPuQ488qa2c!M1sFjF;6U08Fp~kG z4GV*vL7ovfU&dLEYtehm15@hQ;NM(&SqBaFD^U9#PKQwvp*1Q-sD!I=rLsCLpgU0`qnB^|AD(S9J@}7zC$? zbuYhxh*ics5vV&pkVz7@5+%Fh;%7k>5pW3fou*3FMl6c*ghT=C7Mn)@+p06~3ije~ zmTpuZ3q-v`ISMQn$dzLBKK{-UBpK|puLO+z6ac4-cOnHw7)TPC0MP5>H%W00cMM#| zfwe=epFUB4Bb{@QB1q1(vGlaGV8em!kidkjsHlzRjSYkb5I{_ZCWYj?h{eQ=L)1ZC z)$k!qu?r~Fd+=xz8c8-?4ONDnF%6Kwbwd^-kXK+9YA~+G?LeRnbA2$hl)8|p=L&Pt zPC^GqT$Y+^Iix=yQ3sNI0Ei8JFYvg2TwYK)NdfgkUXWA~u?i;QEC32{jJ5yF@jyF2 z`fF*^lNH^bJvTV6mh(a&QGz$zRv!X=2*80>#6ikMNG$=FQ{l#4cm+%-0e&Dq6Krk* z8T`8O5IV4EG!G<7fywZcSS}IfP4*&zOYm8b~Cg) zURC~fH2=zD6yhktN&w!F4IzrBktw{?2nw(o=s_TyH^s1Z2e!*&xD_B;B2qrPmLGG8 z8Y_@nwO2*mvDDZW7UvE?XDPxl@c@XRJ<8B)e8J;-W&Y?KL!GFz$>35-fJcL!W|DFB z7>a{%i@GI!y|=sj`pZqm#^K(UFsw+%PfBdEh6$XY-#xs}ic@H~(&pCa!mPnp@6BIA zVeLsHD{7JD8_ipHQR01$YwRG9NQL?-+}x2}U0sH-dq}}q70Ub$FBNBG9Qc9giK;T$ z1M{C~)^Lpcv%Xyg7mGQE5i+B4NDU~@q~qHLL00T`dkElvU>K)SY0Fjg3@(621o(2C zM$9M({W_!w=|P28%-QQ;J^#@gVD+K^F!Q`|Lspjm0(uDcN+`fmfU1E(C&OlkhW}>b zpP_O4KbUxhA9JrG0bR>t@ccREGMgRz+R=9KbZBVQrPx>_76T~l7W^8OPx`ttd}L|) z^IuurG?Qi!Lb8A%P&moZLz{T5L&*zMuSRIqoZ(K@nKO0PatU^c?Ck95YstxFl~4-J zXP2;n+PdPchvDlxS4V#0F}yI_x*|H(MHfV59m-^UKeG3ll{mTQd(AX7{2*!QU9qhm z&6C1F{nPVbAZKAe2%JsQ?ij3af}CRFHUf2YhRV%;3>cNsUr7SlVbl)YowGILyi*6_ zzbUY|bRDS(4A`-{ZQU>u|7{qdO!2R{LJchY?QgU|D9O*Wh9hd*ei`oh`c)_HE_kWR zZWz#7I#VYeo*wA*wdW!uqu!ujOe(S16$Ap-7zuUSjd{P-GLT#j{zyh3Deu1it03*0 z+nfhu%``ncyYR65xfRDO;@&$sR&Ru#Fk7YjulEblssCrqHB6Z?Y*jE#4ympGgyPWL zW$Pu`rp@4pq6^nND=7g)IOIDy!z?vy3B1@2#6p1($r_IN1FQA#RM_4{xNYCDnZ3tx zjjCfyDvJS(qU6yTb7_g1n(e(Q2g5869#ax%nrTtFWdF^yS2HG}o;pRnAX6+`t^S6N zR51+TwcoW^q~Ufi7zXo|)klu4Jklp|Y_wk=fH*M_)Nz9151zIRSr%iZo+09pUC`q^ zKwXe1X@gKQhFZO57@MdDeG;IFOK&Z-eD)i8j=7p@)~fNlr%s=Kf_sXEQ6C#LyP1_M zwq%#Y%90qArla55u+|;--;XXstf(3J>bPjqr|MXM?-0%KY23HNpXwv_fS62pxnTzg zbfVJyE*Caud47e-lgGWQ7vJUY1rMW*Ta5+@1J>1AbhsUnfnjI>H)<>P%F{ygW$#z6 zUfeJ%9M3>tosje^yi-&u5@`R!ag4lBd2>Ai(t|1E*CU!-p5hLgCL+Sc7(&B5)b#7>us*cF z=x-CCN6vIgs?2wl6F?J7ez5pI6j2#=cdpuFikZ-BE z98B6$&LUswNXrD*EobuvO~&}(AAy2UcN&Sh;2E}B@0J?w8zO!QRHP}!Bpw4ZKA^M+ zIl~EHk!Y?80~n!j<7iuj5L1NmfVyT*sRmtb|`4hRf<=E}~YMWIWckd27^qPjcO5pniNtV?44p zyy>tW_QC`#owjZaIPqpa-O5$SQZmHonH5$W*X^}cwJhC*#P##+vc@PxGF;M(U&V-W z^a7qKQ=$tws6+0T*NWxK%TnvhMlT)2_zI0@QEX^^d<+B^O&kdkKJ)juAI$_O$%HRD z-~#EDPB$=(3Oi}bu^Uh(vSC$foU*<{?qcB_8Jgv>Z1KUc1ls4zKW>OYQbV8)a-Zpd zXrT>IEX*;I5uhe1=CeotZbMw7qopw*kmNG!Cjt*?i8IgDcl)-xi1<$QZv66U8+^DM zTRG~_Rl(V_uOzu%|Mh&*Pd6X8$|>MoQf>upF+HM*w-Qy+GRJ8xTbJoj({>P+=p5Jw zSb!)+gxaWije;_FJShEx22&TMB4KdE;0L+yt_vuh*b({@9>i^`cd9m_U%2AKKPBD% zp2nr*R<5Ck5tY~0*HEg69HpiA%|fVrQLV_vb&R5^i0UCypW;8I(L4ZpYtzLiTZh}` zLco<-H2@`GP$A7>1(GK3EvdSA@o}^yfN}>i_Sr5O*_%%9J9yoP1|)}fw?B(mqcrryAu!@Nvo?PrR)cN!z)HF4fQn5q^3DoAdrY_SE4EO$5x&B5SN zKg5OnDnQTJdjk1RhqCiXv!#DBF(ccmR|{@ixZWau(NS>^bx7fa7k9>qOx{jtr@ zfWN=Q`6abKV6`|H0pzxx$I5LSiP!Txl2ow!F=Oaqv0j0U1+Q1^* z%n!TqYA0|bg}Zr9Q^C`W|&(gad zEa27t=4<45>oB@Y>bxF7$?j2C@KvA9;;?5&s$0*I^3~7rp*PlfJ;z)Up+<6ahTX*- zfWNfEp$Z_xg&=Wc*1N~T1lEtdu|4Yg8yPt&VjiGTH@4hirRoL)3P z4B$J%vr`&&MX{eG3Mpzkv0*AF>O=u8;e-TXdlHaDR&;cfkqV`JKi8p(l!15lO7lO5 zVdXSBp7piwGT-y5j-g?sB*w+`iL~iR#Mmv1Jg+n@^ceQqyG=&d)AP}5$=I`ITg~i0 zm;Ncws%L47`BHH#9~bHQ@pv?ZXB-f#IPcu`^Dx>KfT{1pfN4zs;X+EF4gC$nK(1Ge zD#QK#A%}>j4#Pu}4G+EID?5KZZ~NTBbPrxIL(YyBf(|+QQ?31{8`4}SJ|xx zMvLQAGi)zZRSt9?EcaeucDTR+S(TK6_q{P2i`k7oo6ma+8dCDnEjiI7YpNGVc$favF$mTKQdqmLk9j#i54 z!mK@vaV@>FEUWDOyLXrp%9fZuHXnH&p3Qc(>%coh&ckYmhHWtM%7Z?eDo!kdEa=|o zyV|;bnqi9?m&zJou&fgK%Y4ot*g#In9$xcc3}s@?N^FyX`i%|BVeFc(7s+hhg!^D5 zirZ6i)f*QGxbjwG1doz!zHwfGqNk*4uUkxYEC2}k%y~zB0Tr06nCE5-q=kh++l2e- z@%aR}3eCyt&A=(Eb(iTf={OoId(70SkxO zdJb0`e=Ge)cPL@><-WZ&?QUoNwtc#fLtd=XaOc|B+-O8QNWFKhz7}wdZ{XCwH}zO* z`H72Wdds40-mI9kKv9v=P}^y{m)N~b#bG!xRE)T3xbd(vO;M)w&M!5Wq2O2 z+>C{I>+UObj8F!M_8n+k>dG!Wr~JM8BQ5}*^XOyW4)oBl{=$^a;MHt{YSucEe}e0bc*i?f$8oOBZ^qUCP;GE|0NVmukJ}VQfA-|I zFbs&YbEVgvvmBFPiO^y6n=-;d->>fvZ!zm=;pqGAIBgkc2(RWo2O%Mst&{ogy|J8U zm3i<(=k?6Sl!NBo)vs3q=E%v;4%?J@wAxDO-SyqGEw)ZxZY&tSMl16BAC{AB0n<1G z_xs*FeY)>*zxT%FdXXM+9r7ZLg&kLQsV+_emy#{V!Jac79v9>H6#C zsF2`*vrgsK@6C2T>=1`j`vx8-3ZdHPu5zcOP^sS*ZKY#nMd>wZU}Ny#&^knxsJ=8@k3t-?Gwy!V^n(Uh6?M z58i-G{OOy?6E|HUWIbh$Dss_=kA8%tXgajyI*+$EXF2hJ0JH7<19(3a^@k)n0caQv zc*Ma!C&rN%_u~k(!rXzCc^|d+Sy@?G5%r|dZUNhoXTqAgE@oSn@j=h}B>ky~#)4SG z`iHmY0FcDz`J#8Bvioi+mO`lW#I@pI$k$%pvQvM5Mi^CntyqH45&k}c`Tz8e|JnGX zK@TA1jD^-A2(OnMxr;Ba8*)oY?(>o3H)$4jkV)i5yXz$E_Gj4OERNUwoK#|(#R92o z3^q<)O_05`o>nR-f>^h&V*V`}I;DDMtnmQC(kCE#5d6(Vm!N$`EsrB?0*F{a z`g$9;5$VK{`+--`vbMf#`!g|qNA=Y+0L~?Vp5h4vSuYHZjurwS4rxd*nDLGk9VwXK zLbMI-PY}<$>Tk}zk(ntOVGte_#fC$PL{X}GsslbC`NBbRPE-UDS!%yJaSE4fB-M1X z&nM^WTvuYX6)gjhmTOF7!=Fh=OKbOP-vpp7h?+pI&m-cBJgfdS^SAY<=xPHn0y6?h zBEo3~Jp2|=z|`4nc~}UO!8K#pTZBe zTz;T)8hm4>&6>pJ z+mo!&k$F@r!Y3Qf={`j&8CNZ2J?iFtP`PM0*vRaEa&?vU>+u4Zq}PX_p4J zXZX&aI}xsI!ywi)2^wy@VE5@^Z-F+FL=%kp#Z3d+lZ1n;f({YbfK=-^@ z7Z~QuBcKDkBB-VJL3c+gLkexdcQu7)Mm^M_K;<@&PLxfD)_>zo1|;k2DMe=nnsh z``QFhf3fN6vLMY#WV>C5$*brfF!~;3RkogH7yar3*~bHo)V5I zy628(W-hQj+OrhllLI%fLS5e=G2oz0aXY8P{KsP7rA zG7sMVA}^N2$VJ;qI2qFT8ojn2K(dB$p7X!He>f8{pM-|EAP&UIi(JRvxbOL0mhNv5 z3d>iml7OEw2YGJ>P*_$EW8=w!SlG*3M*><>vZ|Mc5fDsl9$!&7YxGfTNEGEiohEedpSpk?1k~9k>e_ke(Az6K0)x7X4@9M;e7wa$O{sGT z5Ux;)aX~aj3$gk(hM&>99zaJhIosj%F~(`~YeEcHozXjvL<9HZhgi3QMTb|=t?Q_T z`{M}?fj?p;jU8`a7#zaMw1C|fs;itAM%#gVClUe#&>+xE4~!C|8X+1WW%K>}%Way_ zdO?nt--UF|*8z|s0Tt|6$>M-s3190Brm-p76-?;l@&2l8`1tu5T6P6FO+nq7ce>C5+?~DBVS4VeVng8{j5#{AoHVg}Tz>&3*6z74_ zgk6zTqWfo{1;EruI9_GLEwuPp&ObdBPZQGO7VHZQG?iGfLIhv!3;#L}g;d0ZhVr5f zHufQ}0*h1IUG{9NDcM%8-&E2fYF>*WyA45&x@ZwhJO;5Q>axYjyW${Qxeb)iH&1& z6*wgFt;v72@7U-6`Ellk`Uhep>?VS7z$q<0EDvRssX{c!aZPxQ!R1rL5LC^-zcTEF z4dkF(+(x-3#6dv1i_m@`aI)l`6F+}jV$Cjyw?iRJmclCsBA9Uaee1~Pf|>0r~*D5>&a%VhqzZ}^uAE(;V6j33TZ%yqPa3iotJ zo2CcKo!W8p#NQYG`>UY8a)wfXpUixSd5>l}cKj_!|{his8tZ{RWgeLun z+^=eTytG_&bY@aX&C&9ogGJj18>jgXZT=MhG3^XJq#aNG8PJ1w%e@Pi&Jn~o7Uov| z05>J{NQOufGyc0}oXS6GMMaw3{2&LEtPc8gJhD;3-hbPTN?xhfTy)(Ufgt?Q?>4F` zCP6`|N&83ED(twI$!a9hTTR^Rchl3Nm)G4tIlE>j|J7MR=8GmB+L&o<@iBDH!u8X3 z*-bW0@E&_!{O_%A!cyMmKH4%qCayN}d3ydp#r(Ws@WDv{21rIlzj*`fQhg4pUww#V zxHcK-m=~lil`2?0E9hn4=o8L{*^e%Srt2pvKfEHAx{o&5UE|ZDH+;Qjb;r%oPgdFF zo6xmlZ;IZheeW!eY+0UA=wg)L(I^nbrPc~c*6?NBILxYz(!yz@LR-P%RucO<$t#siV`@_{^nWFPQ&cShLYH{HN?qdNXpq&Z2YG3LkQO z7yni@Tdz#gROIx?)Wh4}y+RiCMI>B?}j0`l5X8~-=dnCy4P{1S$yB4S1zmPOSALxb9at)#cLX5W|U+1{GH zd~oRS2RRBa=G*ELInI5{d=hd0x}NLd<=+$v?&tEgn2TGtr|z?t@UO^64$m;u+y+VQ zrCeb$phDC0bd0>>a?F}JF8F%)QtCoN+_wk1fjlr^F9sPsUe%j_92x1SGpfxmvl0K3 zP!3ZzkF_#QGnoQ=lguA?^^zXXwDYF6qEAU@)wBS%NnEHrqyBOa)v`Fv#Z65Q4Svgq zs5T!Vm7m}7jD~<`4=N!ygWMV-tqLlmR%l@P%oL0fDH$VMIEnO6E=rVn?MV7EZ zyh<-BM8*$c3_-0GsVO9E?Yl1+rd)(`3P6ZJIDPQIiz4rI7bNFPh4XN`7NPd~h*7;z z*OB8c(*60SKFBx8tPtk#qyR#Y#Prs9gWm@ZA6B}vDiHOz(NhCN8GM=VSG3I+W@tW> zoZuYtJibH3V5lZpxz<`nCEHDqLnKO zjo89UjIH{IfTp-=tsl^xR*srF0Si!vDm7W!q7lHlyB9e+nGJ(WB8%v>*SpaP zI-NjFc&-eo?cwI;)00yL?_it&0 zY^DaHIhusxHaNM*lVoIcT9Ge3gZu{}y}!}rmIH_pG;4Ym{GJ%EBvpZdB|bk<|6->u zLaWRpr=>1$O29MLz;J?E;vjA+#l}}YHhKiz7DBtTMS(b}nziZZT#HU9pP^_lGh&y7 zb@<`~;c0>iby4el6!?#vS74oX`jJu$o01lfz}W481dYIEm9;s2+O`RkPySiv&NZl} zKM^-p#(B-$Sf|UAzIxNsk6Mj3OYUSiCuSJVnZ#wjgB?aZz?NW#!~8`^ZahpXdH%d7 zr3q>`A#74|)PWw7=uI*l#oH=H?M*UZR1^Acv52se;0aKap{0}s1v|CzQY#j?U~1n@ zFq(W2gNAX}17M*!mP2|o(k#EXDG4XaZ9{E@3{(#nrGt}`MSKTgXr%Lc**~)md^q<0 zqzBel-QHg_#}lFA7H(qxG@R$hx6G|OGEAnTqt6dg(FBx#R8_!%>m+c}kIz+GN}$U_ z%y5gG2qhtDqYqKk(Zm=4*7)Oiyl=0{$-KhxKkniMfBon^3&@Y=fMQckCfifgm+ejU zK!K;^xaev~Z0AS?J2 zeNZh!tgO_D4>TG3_Bf&l!TyXN-KFqtq2~rfX;6<#*%5MN5`F^jL19g1<{u~+gU96_ z^(MD2JZ7v|z1V_8Wi&FA!LB!2%;BR2Boy$SZe!{qBH#&+MFNCHtr7cJWKJC^05n>0 zo!4gj;2Vf}1b`b1knZ(oADDET^uHbWdT;HInPYO?ue?IF%ynck%)b}in_cJy=9KPtZqR0rX_;=cyNp#ugg?g24tF9m_xta2YvXy8wzP99<#R4^3=(lytPkUJ zh$;OQ!@{{XCtSCE*(~+|!vHJkPlLw=zve~zg}gOS&S17y3YuqMNQ#YHbIH^4!iWYR zztB~yI*AR@p`D>@C25h<&P9Y=yir;s!WHt|Kr2*%|FBuoy4sc}gImshEwx4@;hu*; z4$M9XVt?RVDL*FfK_c!o*y)cmN%IxShIOwcT8%w=DuGAQlx2`11oGNQJl;b0CAtg_ zsH^UJ$)H(b!M@uc{+eOUU2%6R-zd;kqJ)UAZWW0B@_eg`CsloQcdYl>AkNHOUO z$eC1fYXt=5($#Xt%zipdN)lHusgrb*?$TVIab(2)S;m@u`)^rAWmQ^>Kk8a}@!kxD zqfmrX2LYl}X8`*%gw`3%2W-P@3(tmBh+x?>1P&$%l{lR-z^N(!^=F&l@t=b;T~B@y zO{;tv&o01n+sxoYe0Z^@r2nH7K|XHzn>s0<90C-^XMdJ1dM+S&MnQ5-cd<{N)$s$- zAqTQU4!k~H>YQ;S?)A3j%Iv0$5&_}y_yeZSOtk6o&jMW=8T$mWbWQdAc%D^`3+`4e z>(YJ^Asv-&D3|PKMuSi;?%5e@6OU+3ZQrlD_`>dnc)^q*o zjZ0iR|4zZFNjRe)Ny@?W;pAy7ywG8#w(o-mjA+*9+8#o&AWpnIE>$x65k13^DA#pf z=nX(w76yFu&+c1=)V@st2E)P^Lv1r!>zb69NVhTSw~yyGs%gRrirYea_!tZVQTg}- zjo1bt7QID!2|u=4WrHi-9DDcfVhE~!YD(ZFv0GdUU`;7FYdS|zZoQB3NHk_|k~#$< zHBHdmeGp*H>Jd+gn7VB zGmFoQ*iSfxnTX%(-Xglf>BONG`u@simqc@zoz?l+`Dj#nS;R>N#nrRVRBkj~l;kS3 zr9x(|u_V8Uk@W6qsfL<3UUw!Q(5AN%rLAuT_(X)CSZ#CuCU7dypDkk_03H^J>MyhJ z_28DP%|4kpG3W?tojgWs1MWrLo6n(P1V5pHY6Xz6k9XhW$u!>=-NXZ5??OL7ZURU{ zF~=uB>xb(ut3cFP3AjcHbp_+ep{`!2)!R}>@G)S&uI$$k9D!I@d^v1!UGz_U1J3WA zJ5EPC#ptlvKH%b34mHmB6+bW^**Lfa^1vhyP6Bd+E}5AL>Qc)-d@`=zx|Fw0N`Xb; zOq!~pCT>CQduQ(kb7uu`#AtV2n0;PFa=u2m$eew3YA=T0OK!bi)NIOnltW{GUQI`%oWS!V{~tGE{@ zr@M!KjpteHxV7bj`}$3c)Q_}jm$D8vUJA9EB;%qZvr};CsPVoPPk2_O-n$*;BsQYy zcItG%#+U>iJp=Z0f3_~TUMI?LHz!PM7iu@3S^rg9DoBc&9A`VYJI91qMPIpWjxvsx z&JB8OYmG0wT%z|hJ)^k%-CRK~pj#`14dYNY^8=HnCqVtI)H4C}^aScaAP!myWsB2{ zg)z9evoU)#chN~ zR@c-T!SU0-k(ExHG2>eO8+;$@WPx>K);MJ*4HkO?Sb3$RGSt#WO|QQ`0tbWrq#oSU zAV8I5P>}A`ZuO(Xydb`dsULdh&Qupnswu`tCyPgP!X)5wpKGGV-=-;j%Ajt&M|}BG z70vv;p9A((zFKOq>3rtY`)vZk!Z8xR>o_BCI5iZrb8`u%e0X7(uA^b+tB>7o)9HhX zOi^_vZ;AlRCj8^hD60wXL|jX#5M`G-YOfh1wBrcBU)iEs?ct&C7p0`17o|=6(p6QY zs~dgug*Tsyknc1t1&J1UW%n_~>GmVF@{8TSZA|1@zojhDRQ37e|1N_&TZ(G#>GE4h zB+5x?UWs&NJANZwU~taP4Y7TzrMWUp1yrU<6q)RpH1omi%T9;(o!{wGzUQN2E^vYw zE&6lh+|}3ET)G%<_1!_jUtty@{($WQA0sXQ$S^?9ZDNPRqQ#H>WC2@NvvnLf=2K&* z_FY^W?ZKN@z$O6$!DoVIpUA4k{e*%5p8CBrGc9X#2b?4Ft{p}FQ&Xcu_$yDFiOr7o zZIhKNQr1fEI$n_r0>7(SI`97d5Ae=da=#gwHx0~%XeJcH)^HR{aMSX^cm(}9ydY3? z0ML;%0nKMyw#NPmvq?NdHC)4+mAkc~V%WIPXKePqk+r-1PncDwXWp9f>-Ax|6B!B|n$sjMu)3!U=FF{2 z)vKD-FS*lR(|I3v=w+74Pt5F-owDbqI%%ZONC?nz^VEH=k)|s#S6j!BD~vTk{gG}? zpVo^Aao!F75gwk+X7;=E&xEh#Z3*Rc3=6y6Z)F`3w)1bHE~lC>Ps(hXF8|3c^O7`y zyJvQUyXFOc{fH=KzODQbM_X!Sm8^b|#P^4%-|=l|n6s*4kHqu=K8_6CDTj38w3D;? zq&<#WEl+shb4AO|PENC8;m&MvmV#9Bk$p z#Z#nHAN?*wR^ioy9KS4Fk_c(caJ-G;d)pNkbFurkvtRpOr^zu;0+7ZQeHmVr)O~tx zaKVL2p0QCpK>oJWcWms!dU6&BQmH<^ocWx$#8g}xmq5LZ@$n)wiwR=%KB#VK-YS?t zjONh=L!$;I{UeRl{ExzK&WH$KD78%1#w{w~=N>belAUo#I4z1~*^R#%?Y){5ga%g3uapU3j2_^h zFFznGUQv5u_?q&9=#^XJCFfU#i})R}ERyV%SFRlpUMh7oJTcL9$C-eLG{u-dGOX^C zXJj4QV0KdX5+4U6HQi$QcV%|}=BGa_*O)kmi>MfA@5xE$AJpPlF=uB~Vz_@&9mfXu zt@q1%4oV9zuXO#WSjjEw$Uaq}+*Q(dZCR19sb;{Rn+q0=e|(|Un*`L&fc^W~O%{T@ zGR8p|Lmmiog-} zmoavy1v9g>*cwvRj_@y37F>?}@rY_>grbu`)!vR1XX-!Q>9=}kq$K6_w)|y%3;$Tx zD&dWFfC&hbC3Znc57ANtK z#-b@hY9rP2(s~m89%*r};=$GtcbNP}#b?yl34E z73EF7_2=(L9heD#N%WEHs#1>5-bR&iA)^%>0%^tkY%JtVfXFf-Z$kM7wLP`@lfoI0 z6In|EzpHosUZXEgzHl_7f@UzGUxAjK?8-56l{#-bx>Dh(LId7Wflb5C!LHGkikK)6 zVKwz4PW>!EmnUdPgJ_wcDxe%x(~+eSY;Q@`B^G0kHr}65o-hpjQHWKaGN%<#pGj)i z{wKMRV|C8KjBu0!A0P{PxoZi5u-Hc{rar}nyyPCg^8h`suYDaqhjNkC!8r6Q?Y+EGWjDedgLA7;~#L@Ok&rh z$?_sj-F)^z<}bw}G*%6vlwb>MXi~hpF}moq;oP+f{4WdrrGNce^`YvC;Me>Z6mCnB zGj!fsR(x<)(*Ln?c$d}dOa4<8UbAWXrL6OF;<8OQadrU{s{g+Idwp6gaoj<6}&AT{UJ|r=2{`qrwAQd2d$@EBfs8k^G}wJp@F1E8-(mLlCYBQI9iaMo>g} zC2(r^?s|zts?F0ID4E`50cdNIjw(hNeCX^9fmsy|O2D{df1Skf0Hil-sBavd7{$n6 zVNh!WFmxNj1lWg26Q2$zU=4^*zz|BH^@BKy+*v1>-?Wz%>y7;!0!?2}<~OmFJ|z_y zEn3nh93dg8e_vx2d%z*fZFw9{U$P?21F9mp1S4;l@MLZDiNC@vS?V6dc77mhJi$#@ zGMW>Rt!8}g`m5S~(<9Vkbo{Ec{H7T07355@s^d{iPG1`1x?&P9$0WW>H<-YWCX$2a zce-vpf9=~^vy%p9>HSL@emeAs>`1$4TK6Ty#0lE{i?W)(m#?og%QN^jC0k3m%>*v-j9NO_AtTP4||2Ug+|F88VfBqYwRj?ZpC5t9X*Ml>vQB6s!ZZjw+nfCH>!4Vwa;4niwQpk+lS`rUbz{|C%ICZWl@GqiKC3;`j|c` z`)QemI+ye;ty9FFZ`#I{dPBnhSdoNFBin<_n>LaWu@B#wYA@KmPvN6%j((i3&VX5R zh~eAvOcRTbfmauh}xG!d(b)QCfL`Y{F$U&fDkl&m@gM9?#-{{NuGwzw4bJE!1h+#K-kTRA$GR@0E)~ z%Wa#k)kU3hR$Q*4-EZ_-j{jW5QVXp)(!!2v8?;WzzJJ%&_0~Yp@|C0``xM@FG2tU` zXPg({@wl*6b}c)X)yg_uqu@LtS&4t~)6p*f&a) zAHVFfJ{EaO#PxcS>88AI94hAxrKJ{aJ))tiqKV-5>7Mk=WS8*pYkIro?=N=4@#=u!D$5Wm6AGx_f&rs)!NO_S};!Cki ziJe77x;rMP-YDI~D`LC|qz{o~Ar+bK(*^B?6U>MEeSh@E94ORj3j#J)`NV{Cit4xH zV_4TkLk5C7ay{k?&5U1~7$VblI)d|4*4a(*K9?8li+43LJ)dy$I!l?%kwp_9^1#`< zn0~dm-YlyB11BUEn-(j%9C=&rezwz*ZR#qKOPjJ6zRa6p%Chi{%6-8-2iF8~E#9TS z%l_hHiezoDzRj^o?^TM#)8;g*l$UE~qP`Y3&>?Yh(9 zKBoeI-iX)44JG>Bcdwnj088G~FS8Zx0(oZPrkeKnbOg_=yFq+699aXoS6e_H5MeGA zf2&Y>#?`QegKCIJ=MCZ`9ZnPniF`KYpW(hVd4QMGbgquGs7i9Wv0-HR!nGWZU;LKL zPFylO{LxQ7=amxj4{oQYNCcl-?UE6=T5s{b^9{wid#l4)qUHY~)=eIm{jFBJ)I#J` zaalZb*{|DJieCY0+4pIWEs2(`mt$|xU-)u^|FpOTN5aEzp2*lU#VXu$vt5|{w5(uG zWXY2z$5gs1%7iO#LE&aD_fK(?aBc6dZkOZ7hK$A!>}}^a0Ol4y9uDL)=`BDfSiIlws;9@GAoy0?+PbE|-9c>{@Q7={Ifv zr$ReIjLbe-?iSrv8x>--SyIjB%;y9A=_0JP(E_`cn7$GZjbJ-7>t&gw<6Goo!_Kld zgapcjOhaY}$O#F^$R~%($r#Hg8(0`jbCU!NW)kl#8&Vj#@z3 zJ$cdgkz<#Sh3}3WyLc+dylmN!W{wHB`w5<`?1rs|4v(isqN}Qc*C>YVZqoyY*6bsD z@@M?04NnQa!izPpxmBS3V5+pR(cFxF>%VC74jHMH()^1uba3$^xBPnrXFugC_gkt` z$XjE;WV0GuY8lUGqP2d^-p$U|b6DF|)N}2e98KMr9a2XxYKocOz8hZj|FQPw@mQ_x z`|yg$(8QJ@C1fZSqLfO>kZ3YCm=y|0q zqV|5ipZCw#=V|Z#ly3LE*0rwdJdg7j%*BpeR7)JLQ#-5@AQXQWZTjdF?f;#RHrx8& zP1)N0>oj`g#R_XBv%?Klzl3q#W?@tIW6w+J@Cvn);(fh% z?fS|?r5-oE${x$JwrD2vbc-C3+;(hud-zfhxeU|L{6Dmh2)mYrb9D8+r}9Lfi>O^R zD@xf|XR2xCxIV7AGw!fWzT3pQoLf61%ONNzB#wXa(rD*@mdBKeuTbVn*v|g>lJV8V zhe9f*=R{xO?6v7p6XS`RJsgieWD` zdJ^>FoVb|r67bPKhI1xr)V)VG!Y&={5>+nF3m@(|Kjq?G`mD22;u+V>5;U}TO1XsJ zJzQ;X_E~HycLKB1LFhvFIh|%n@oSgFl+};cKIY-vCWfO((01*}k;n%w#!I)l5{+rt zCm**+Cq$LqO;p%sdzk9 z*G%2ycgA^ynk0lRo#8f3gx_X(vaPs`kiDLtc7H|F+Hns-dI`-6mIJq8N%;f;6GjIR za_Yt>7i7d7o*upM2E~naz~oCwr>_Rw*xD|Cc09`k>wD2nEEzLRT|z8R{Z6}fb3mP4 zvPexPAODLz{Y@<5^K3===ema;PP9J}BsWD%Y|)Bgd9?#tU6MQ6!{(qr-Tble-QShC zS;UW4B4AEy`?_DBx%X9WI+R(Nwq*5U4MUNLz~xcL;(N9Y_dF=G8lQ|?@@??VdqLgU z{YUQeX!3k(4}adqtH8^X1V*SJhwvj?iHLKjjxggQtd*R@YHUu}UYS|YsF|;t^<2-k zP|rTZFg#4iB|wN5)pgv$=epA$PQ>Ixc;=c{%RJM+_1m|Whq&2{ZVsp#ZcGb;X)`!T+$sleHn-$3=<7GF#y#9%6G{RmrEQlySKF|`2 zmi9AtQf&EYU-#|1+Fw@LkhcEtyEPg`ao6c0f7j&G+;!?l#E-44C+0M3{5nzlCQq;} zOo2bu;vBPKE~gNqDjB_@Vh(%sYys=T7rZt6bQFY3#wTPvlT_S}s{P@_uw#kh{^{dH z!8TKzVlefSqrvsxQU9=Vwmd%>=@x!$%3{wgPPVr_z~Bi02TKsrO*QyIP_ijJd82j@ z`==wyd7_YlM5U%z^4+_O)?Z#Z6LDBX13-H5M6lQ~UVaZQS3aO~=Yg+QMEuTl1dK$( z3?^FO_VWGhM?oG%Eai04BH-Jgt>wbBHa;;F|=nNv5l137qHj9fl0i{V%}=$4~2X;De-#xv=g7CKnS^(7B) zt(>J1B))Hu?@pi1gIT}5kE`B0@Zz}lPKzz6T{nkr&&rB_QgJ^@tKneJ&MTK|9jE^u z`o8pWt+zwZrO40wt-G{_4<6JzU*FX%+57Qk&VvXI!2&&dR_TKuOW0%qJ-$>H*>bu@ z?Ir3ppJvogHQa8C$dil-%RpwfYrdkXv_g?2Z zOTqA2#xx$iFzper)E7j-5H88Pz3Bx#_l!iJ%znO6V_MCVqvxeE{C=EljrLF5I7`X?BTG|{=lm3~O&9RAFcm#i#a1aGpcnSO`9QsZX|LQ$2f04`o}UYA z?@QHuKb^yIsnY*YVd?RqqMdpgUzXi2-z+*Mc%KeW^wFHRT(T~nOKmnBZ=8Ss1s7Ve z3R&0FRcf+LHYzf!f_ZIAbdN`iv9nsS|K^#lvooq->q%aP=VoeQUn7z(B<#Y#+Z{n| z4CSoZraW4z5Iqrd@QmtM{kf3590L=nDK&3M4cBDG427=Ab~X2s3tkbS8-f*1%q?i~ z0*lJH;W@_+(kc=@kwqmGLqr0CMWY;;-@7U@M~bPS_>Br$vi4)+^K+^=86Vhjd{7qGr0mor{ zgiQYgZ=zWOGAnzqcSl{f0jQ{crJD{=h5=Y|9exO$IpNW~Xm3OCFi z`mQ2%I{$tbEny2`L$^~8>(=;IzvSQb-oa1vVr^Qo#p0eljU~9LId2Pl1YILlHG1^R z`6BjS8H!xmuuEj)crX8%y;98rd3pmkeu}RD+J3C*OXsUMv7E-rZ#7kfK@^Ji8g_5zh*AXPWxx2gSpU~YQqBG&@TfCo<_Sry+4t|>b2Kv z>!b_Q%~>a~|FrC0UOy0%lABYqCEc9qOI=D)!z?W1~B(XD?Z|b?t_3 zkz!->dkyoxMWkFvn1d&R5aq@KKN9r8BvK4oT5RkLvt-HrNGDH7c*sKn`~>oh18PWd zPc)H+N7`e|9!M>b;|N_3xy%j>4pOckL|P+BjvzCTo`^;U*Iz$Zj)wY{18Pd($}u2h z;m5w}jKf(C_8y<+^NY&`U}H>^cDV9#^?1SBunDL&+p?k4$N^nNUULE>b0>q|LSvt| z_KV?gLH2Q&?l#l0gOdyANia^UPwhVdOs6487$Ecnwa_U!AgWPJ2jo51K>N(>gZzqw zKqIk_u>OOH3DP$nSlO=zs7iz}OE5z-?BO}23G^Kg{1LFJ-j{E&(2UiI=&2A|b!Y{g zwnOl=5IGT)sYm3}2Z6ij{Q2usALB~E)|3sN_Wl${q~yp2DE0yb$Ivy-CF4_2KGekz zhAE#{+&LL>sRY&&3zAV#=UVmaD=Tr?LGj6l^;+r;u)a+PY!@~MZ^3<;Wi{&>eEkcV=@8196=7Pao{~0G;E6(TK`#i{R zt{*6ow5e{W=aK+xak7c*lkS60yf(X3hW!?tR6hF%BN(sVf8nFYkYZ$MW*(dLKTiBV zvg7Sq*xion{77A^VRJTZwqnTe`!u~b?^=r9n45p_9w|N6YjVzJ2-JQBsW2GhVz4ko z-4=NA`FrTN~UEno0hSdV>5+A7o{9+%o~%jikv`fvM0$YCtE8 zZ9|0gU{KAmFm%LG^x?tp62zs#^dsl)4TSF^c8d$9hS1V+fgR;Xo8i38H|KoqPJ{9& z-cW{O*rAMLt51>G#VUk3roVo|{IBs-*m@faSVu_q49!7^@WDXvYWewH8gbZY&>{RA z{w=H|m0pW+@c9R;PrWG9SKCda+NBAI<|0iTJp~L;b6$tbq0U+XYaF z`=JJZju98(*cKVP`k^}JOxZ?bmEp?I&h*6N26M+huluRDn+LK+Rz z2Hv%w%!}*Cmq$%ulCtI(Hu;m!WOKN?a+Z`+TcT# zrpuv%$DDF1rM6%`vj52V9gbk*LjP9wyB5a^cAR_p1-@YSSYh(P_+YNVpBT^_{`+Ml zl=~fKd=q@c)B`@g4(>u!d2%e20R3uwN}b@HtBM)K@4J$^H53_wNtE#p_{F0Pl~;$mC7;3kR{sk)zs-~r*qjRbFk4VW{$7tHi2NA|Sb~+~2 zdA>p`yK+CM9S*e|C#M>@&Lw&}(s zE;9uT$-=^o0VR)W4IBl+0ds9g;&)yfpdX9@M0|Dt57UUlnejn2tL4`3d4S}{xOMwF zY%n4mCcW~XhCR+bk%+jV$L73+9o6f0zIh9_e$EPl*HiL^7& z=r?6#Zi%QYH(7E|g%%tVzB2fIQjU_-a`n4c1HcMvG5(0Hxf0Uf*y2}+YBP1j39N`9P6czpJKeplAGPXs$sJV=r>{={O**$?#=*=&)C7)+0KlN1}=ZLF$Omdu9dV|KI4fhON;)cLJnrCP;~V5 zmku5i90yTZ?sEjC14|3K{Hygvy`LdtJ8pU&Ib7sdfp(TOl5~Y$-E%a7cSTFpPHc-5 zvJ4TsB`iDF^lo{z?|xqt_j&fpmpoidgiOmNbq7e&(f`hUn|tKp?~R?Eon>SKjW`K1 zH^n|Rj)CRra_CM$(@u5ECd!b4`ji0QDaKVNjh@xiEN|EWdsF)qB|N{3s>jdc%Dn0O zX~_@#XuWKaSqgV(sJdB7R-_CM<_)VOWb{&pw3Jjk3ukrfUHnD%$^|7ob4D*7821ke zW?9NO%e3nV z1!CZ*8UhfagM~BTh6BT#Si{P|z<5W>T7BP@=C|ou2G{MOi)R<`vgUvY0?Rz1(ms7O~*Yu zsO@vz?UU*?sQ$2$U;L?`p}6Im8kr~$iKp?pY9-U(jopzDm&&SGn-9X>!sGZ6h zD{J>DXTDSl!Y&%tEU%}>_Y-g-thkh6H?RmJ32!DhmVQi9vj4_j^b{K#Ydel?6@A zv-H|mU3?6x!|~G6(ljf_#ih&)a=HO|iGme8I_yt+)8d>wS@!JvIUJ5N7Z~5oM2 z-4}SGuNcZKL^in>j4(6K3NH(+tE<~zYWK30rDU*S3N9vn>FsU9@b~K8N@uaQ(6XBE zz#E^Q!~_azsgQ=tBE^|;#TLuNkH`7{^E2z72AS%n>T3pAi3ruWtzrD*osQA`?e@Y8V;&PKI4CNN&e`Wgt-L=qXE#axU z3EvOt9gK$M1*3{M)ShZhy+^0S)M>AQlH3v*UH&2&sxlGcu_Q}kepGN9vL z27WCPuppk#F2q+9s20ISfwA6LPWn?&dm3RX@gm(Etv2~(kkSLBWF#t9n#cx=Z@jeP z7&@_Y1D|>*v<5N}G~2~+>~N4@?>yM&NI5Y5clv(8W_!WuGZqskz};-II3ir&7**?nIy$v$GJf5?}^_`>b0zKU9cR@I?4QgwN002AQJ4nG9KkcgT&NVeNBSt9N zlB=j75PKmA&l;HVhN45E2#~KmJsQ9j;7&z4Hh3fp(Qysh6wXCU2Zy8@9EHm%T5=o% z;__FVO~NZ41ILR2{hrNZWj#zbLT0TJL8hNNevttIunlqeWm1>~04P|Le}kB_HRB`< zC`6$TQMJ9&43|fe2_PSn#b50RS$5%cW=o8dhcX4pcos~7u4#F0A z6_V79mr*GZDR-Uk5h=k%j;NoCtwDfA+x_=hkPc1wiK>LX8V^uXa+Z1` z#EwKkhhBi6oH}3?P7&_V4dugk-VEaj-Q$U@IaqiGa0C}QT&^!hfu?(5MxIOlb+Ab}2h?;@L z;KWly7eSOMlEpz)&Xy8;3+-kQazHF;7lEtL%S$R0rbu!HsD%_@1Ukt{nuCS3ZunHJ z@+-{zMiK_$l?jhUK0s`RAk<#bPav8noq)Kl?yVQyf?Eb|-$4`w90<~!>XyOZhn>~| zx*y#P7aNiQ?9}LNquy?`u9t-D~o{LHt`2z+1|Jr&OCW3UL+<#xp z%&@ImVyb*q))EPpRTWbBR+)JNk;3%CqhAp+TFoiV?hhyB+&h%A;A`-HBgGL3vxfiw zzPssj+xI>9zNMb}Len;IipsKcaU+kK%%ZX3>c}UGf4g;PV2Xy#qqg)D-RLE9W0maC&u9`G>-(UF%P%@_XVFjxqW6cU7wj$sdQaOcwlShxzURvBlF*PS8}u2 z=*d7|lT(fo&XCat=ek&# z#Uy4^M|%3+9xaDrkFlG&-v7vmZw-BHNOicuUbq>wV5(SqHD=*ajpc7bjOe3=C6vHl zCAuPRaP|u`wIlOCf3Ujk&FSj{4D{8F=y`l+kgjDM>I`ywGwSgrcQK`Y=Ed~+R z(n{}^)d#>v@Zvq6WF{L~SXVa_VAo>+<+`bt>`phr-GGCeyAV|df=aGA^nd#X|CsR_ zde1#~%pNhF37A+gg5R81dSN{XTE@s%%y*kNcg{SB{<${Q@w%ty0~139gArl8wGNUp z=d43^3NS@oUENC^zn4OG~;JZYa``tYaA~F$A;}1FR-Yri?16yEo zcXe~pH!^I3`4!&Vyyh^WRMFCrTR(F9^;SGvoBvvCu(vwVWOD&ix%tmEJuzECH`)zZ z!6hxeV>ov07e35?7P9y>mV}^t-?ajzTH&J!r3?w zm@N4~V)r{+;<*?M%#SsJS)hx(aX8;xC+xzx>fXr1CAUYa)iBz8<0l@vb-UXT#r~8}>B-10-zie++!| z&H|1gP#r+720T3BWH}jWo#YcpHvurv3o~&Zy<^MHf1TA=Wd7EnqrGSP^qOhItzPs* z8M=`7z^19=F=GCd?jI1KHg!DCeNh}egcGC4xr(VzR~K0rwkNN`mBtgjE1VmR7=sgTw$B{moCE}e{z#y^^~SjNlyaFY4?i*LCuZ|<}oeVXGR(BR(~K7!;FjY9M) zvb<2+JOY7v;qG_xFe6mEZ;m1iW+3AjqeR03fIj;z(6uoPo-wlMBH!C3doG!;g2fgE z%6)KA#<{ucbhVu6XgaLRvCu`jr&?p;4|LU^K#xgl30m5KtgPiE*2Iz7grZBTZPfoL z{w}BoTQm2~Wh|?1+rVt0e8J%7JEb&*qSP_W4i&Z>gArhRWxVduxK-s2gC%#kFSHyO zTm8Y@((hwj90@zFOq~UTi%I@!;~3JQ)4Iiiq%=Op2hSf|v=|)7ny?UYIU#$c7s}@5 zznthdaB#Ed5XP@O^7Dduy{=KBUP3N2MpYBc@ktp7;AIq{{H@ApkPx) zL@qgG21b5EA|ws6;nZfD?al+Y4(61yqXsG)UC_s`IfMiS`G|64R3{lGj07LTVMo#9 za%>%Xk|gfgjE|vA5k5YB3^;K;_wuKW2JSz8fQy7j^pVp|QylXEWF9LAEGFA6@>w-= zV7rI@IF=jk{Q-uJmbtVjTVTs%n6elmTj-E7s!=VUz^^6{4A!vmA|u=v1sJfbCd}5! zf`uG8Ktf;}agdy~G6YcHRrn*b)yIuF6AUWD5CIO%k5L_uj4@I2rsnh+AL-x1kDU{G z+ymCfV8s);PnmwahDK34N-M?(;4Hmgls6@A76;jy0QFNY8!&x=+fPsP%$l_a-dGcx ze&`QfSpP!+WBK0d%&Hu&O2c^?ar~XV$6L9D{QmRn+RY1cFp|(uxcjO`k)9g!S$*%S zY+h<1L(ja?yXf)b7(|GXTPuD#97qysDzQ-TV@{H79kwv!cRC&E@4(#1+4v|d3!5S3 zd2F&L#~+-11bigq56*YO7uvywq%8>;+`%0BHBXr4DSM z)3Gj9VEH)Z|^M>TlU${*KNYwQ+HKhra;fZtr`-nwx$ zE9Z@0IinV8MiPKsm%K}vsa$~bMB>@pyJLnBPXmii_BAFfTbWQF4CSwg6<Dh2}5~;p+P#=@Ov!Ocuth9ytnG`P_4B$0vuTwD1TO&o%i== z^AT`+_n`-~sx!TpBa!01y=!du%Epex#vp}J(Dc%U3lkfQO7`M`cyaNq^gyq6ym?K> zfyHH2ZARUEBMVjX#`A_7ULfL74}jf!XkFzX^x|Yih)L>u{0@{w%c-5j^EeSu4{(9i zn84R7p%bQV8ADof1tJ^?bQXB_lMfb6G|+eRHx%d{du{${OM)SLWL8E7+~3SH;UII- z;RmL^;~3Beq>|de%VNyE+}W&eT(?#b!vBj*ssNIXy>?ng!>>!+V<-Gl^r4KL?-yme znj|=yZeNTVoKKJMTkr2=-KL`WOCu$x@4qGIY26#FoG?niJSZc_x@!3#{D#wR`ZwjtaQUb(N(1)ZaidAC+v6!e-!c#{UDuRo4O%0ebD-@S_9D{RbAq+@BK|yV*!_dTF-2(-jKM_}S3VZ{>Cs{FH zrpdei{-2AmC_00C&P_VT2yuWh2nx&}ua!FyQsCdyvyW&h_&_hxe__5~-l>>{g>JIv z-~m|sA`P(^lRK(h&YsH>_Z@{41TE5##&Gl&9ka8*;5EDPKW9tz>J zM6Sb4K8lUBI^7`FZGkA@!K3$vqX7`A|=5RBu#3HS`s zlGmU)0I7&_8EbRBP4@hz;tPL_)FZu1g|m_9#fj$vK5{-6FV4h60cRzKA#dKC9PboI zDrRt(EpcnuGLK&c`-r_}?0AD7v^lnT#DXt!SmhW5+YrmC(^68lkiI_s{*Vk@`(A(kJo}8OftwqsD=9 zDS@Y;^nHwS3QlvqYF!qC(du@?W|*I|mRyiIHD}cdi>LQGo=o{#|3Y)>x90b)qJbV; zO(jocBQtv|ck@X52+asZ0q+r}^%rZ2NX zj>WHs(h^7!P);~#dUc{0+x~5ZEQtHz+Z43Fgf+%D>I}}D$b?+d4-JmQ=!6KB=vAV! z`0$MEA>gutMTR9?whZht%R@?>bs`{xa z2vuV!h!X(oI7VsrJMfK`xO;lCG9oMAZfdcYl$IucDDY5U`*VNp>_qgK4`A{YNMi?( z{UZJ1fy3a@Dn3aYsU}8C3Y&(NwI?+8pEIu!s4qCR?rE)78Ql$Q7Mt&>`LKG$u_0s% zcgPziHzv&m#s{myMr>XL{{k-Sy_W&SC4)_s-J=t8vXOjOoYtmCh_*`OS^U*_kOH(r z|8>xezq#zPyRheWxlif)4|q)5^RE0)lkH^V^b|PZq(kWErZBpAlFZH{^Ty}7@^o}e z+ziGyt|{J5I2aj9!OiKz`S)o-sT2INDNPPLfhBALZ@(vuWbBx8P+0W0P2$~=bU<;m zpEsNyo}sE{<=t1cb0jmM2|L)Ebd){Qu}ucT>=--pjr%X)DQLB4&q?gdBW~$^2;x9= z={Lp)u=iQj-aqRW%d&FMrb6c>3v^N)?`rLYF#S)-#y(Bz6vedD2#g#_(RmTC1~~UM z?4XD#Kp@jJh{JpD?L}%fHX-=NzshpkL%l90>qp>>4{`d5(v^b~0cqGOi~RXy=)px5 zRQR9Gn)TWt$-!)zx6G0yW1r0!;EJj?Y@~R|Qvji=wC!SVCTIFw_}(qr<1}#cJ!*EU zL5Rpgpr)aW-hC%C>dsNf$~Mg&JwfQF$(Vgqd3R*d3H|;%%(yc-p6W7&QG-pirOuhs zB6SM*3NkLb&E(MGlIXQ_ImMIocgb1X{Cj;`40hG-pK&4&HX(5?iUT$7=f@zb4T5@q zz!&_eYq9p>QG=FQ=I75m=ijkn*`M!n{2z$wTC)2uY4M0O5680|BG>wy0O zpPuI2opUpESz=|yyL-#(57_S>>yQ@#JbSug^;EoZgYCb6W9~yI9C{0ymEs9)0=2Uk z^b=Usac5tC_39N%xderX!z11aY7ZpSUhX<%(Pva2(UY-5 z23G-F3nbegWf6}(@+BiWQd&9c9&D3SVUt#L8)t1?GxTJTrRSN( zszX5u>h){h`Z*hZ`(6-YT? z>_^q`FY%X>LvfW#aBqBjw|VD!`m^g0y~=!(SDPj`>&%xy@J3EOYmutkX-=8;2;hEx zNky0faVX~_hJXh^aO8v!(ogtW6-FL-751*S^rGV89qRoK*M=7^Q9Q}P`Xn>C5#wy){=-DCiJIY1Qe96OL2wiu8cE8Pf-Ak2CT>pVie4W8res@k(&nUvNL>N!Aq?IQ%m||7tytiSHK`(Q zrsOr8TWEFs;l)pN4Fn>Bro)(2P_HAcHY)b*!n(9i#5p zKzsv*=T1rcR;8VD@9gCLWseIQMh`PRyJAe3jLaW${5W>}xH>i+s@cd52&1kCFdif} zEQWTUD!4f~R55)Wu~rsb;eP8&60tlPjP6h`Q?d#se{VXZQ7wbo>1QfijNIaNUl{Hy zNJM3b+Rbk=3%KzbnC23<6-yMqCqjTJc!4xB%qbJXuuNsd=zh|d4isn(5Q(_!2-;868lq;Rmnp=Wp-%TI&X0F_hgY3L#12>ie#PV=L0^WZ zFrQVNt)$MXS9xQqb=0Twe|$xw(iBGa@)y=0WqowaiA9=^Lx_CyZvs33yXDqis3=r% zl#>4@g()y26jRV1@P;vo4|PiPyFkM?TB&3{dgyoW7EnpZ{hu;i2>nB*Jc{qgw@uU5 z;-F(1arKLC57YD)QO;OhBSD{18_qEdmfb=W_{3pCt^W|cJi{R6VKpu)*>sSTW_^9V z#~=W`nLYvqYzzu#S{Eps$yO!D1~v|=00b5yNt@0}RIe2^pt=nbR0i8-5j+za?Ldlr zFe9WcAC6Mv7<9vdfI_Ab91eHR-W%hEu=SHuBMEqbQ8Q7hkmw6Q^*5sWD=dKaE8jmyand0B&@a0fPMs2$ zP|UOrdj5SvVjatNI;*rCF|Qf?vgc}IOiu{^$eoLY$#lG>jK1JF2DKag_&UuUqKaMw zOOp!_<|6UGfBg8-1hT6;8W=R-lSU|DSf67tOy~on2RMRX0s~>b{`%qS_pIiw-L)l} z{(bXOb=PbJFTx+*z?cK_q)oF1mP3$>N!sc!GK!a#UxM*Z><9O0D(vGOr~ z(F}?7W$uUJX-v5>AQ(`bDSaA()a3Gh`2ydXPS_E8N0++TJsV@9ltawT1=5!9`*g)^ zp?MMxe75SmK40&bR~QEKesV~Am*FZ6T1dkAmCZT*^|Ywc4*mAPqpAlJd?3%)mH87jqb z$0Xb?C5`UQi?bu4BUIUf1&XQ6YH`;XLp1ds03vQM+3V;xpW6T z^Yo<Uh@F+GTg#7AKrUI+^JZ1c1zvG! zv^8sEUz>IYyyS753Bt`|fb8A1JB^Lc)t8bYMrz{)=x9PBD`H~3Hv+w~bIwYPeik^NDrzF3E9eYy%V7X{bGGcxvq<9rIT-f=sC&qe4FHe9=}p@T%g zgc=6}f~`mTc0$&|;M6$S=26{3RKm>{0QRgysMyUVf!V%%tn3ph*q&}S6+OV7fXVx+ zT~)2WF;(FHAsvdw`gW+Ve8R00P-bOeZrr0^zEzX8!c-m;WSZE!h}#DQm;TycbV`ebGR zev35h%&`GZ=#E`H#Sw@Xaa#c~kiUM$SuG6>L6c=8ttY0yWfhwKO&iY*?G(7SV)@nE zwe@4g!nPfmHN^>Utpp2}qHSTA2pt>ejPvJ(eV+477@_aAmhPNv7Fqe0O*tcLB<2W8 zRO~T|4_>0|;Sm<*HZd_ldBV3Vuj)5uqNHYkB{-cG*Y3*&;jm$04uPm*`V5Y+0Vt%@ zrXDqYvj9j68(3#(&Bx=Bj&9wDZo9%{&p7wJdmBu-3<8y|Y@efh`(U?iUHFVDyUTCD z8f-B>(EV}4#>ziCQ_+`W-<1!!{|YoWnK^G$wn0Y5SS5Ko=*Ur*V5{-(GafuZJ@_7H zFFWf594>HU%m-xo6?kcTK-UBx9C%h3d@`pTGLJ<57dkTFBv#;PfLvf49?Q z4OaZLZ&C&)x=kY2q5L5CVk(!3Hh{{bEM9Lm;uRaQCxgJJMKo((Uj4?EnAMSTVaVhf4(6MO%0;D7+nghc$oO*&wD-QZXvxSCWGS2G2yu8C{Gv_f7TkvB6krCKQ zu$hUf46-l@$xsZzvYeGL2k@F_GnTLzq^w9F*OQQ>Oq($FJxzGR6Ba>C{Byn(`6al- zX$DXQ2mU zq2JhIHsMMN>rmHFPIG6x#7}uY%0dJ0j)LYOO`8JS`v{#_VLMCAmZ)_i0l~-TLKMFT zvH(?2MAd??ma5DGcOBEI{~2DNe-pq)3fOece1Muf6MLqB#B zWdhB|!vupkU5q;@Pn4=foOW?04X|YdZ;ir-;h9R~`GU$y0pQ+`ph6y56JxFonoV6A%TeJ<7>E%G&{8Jt^ltg8QhTe{Kds>U6qvVUqIh1DP4HQ187 z7?Y1>0j%{}StH;AhQY00D{;Zm@Bf4NwO@%)`iC1uKFW$&upP|PZc*R%V+{UP3M2UU z`HpQL4*lvGIAvZFV*mS9UCSgzBSZNW7A1oJ{bkeQHMf0mgSpV?Q$6y}qj4%liPBm< z)zhnf#w2w+{CT6x{>x61S*pjkaBxV;ItTy>AMg6UFO!btp9}*y9Te|85GgTsWPl~b z=Jxf6%?0gWTT&fWBR0p|v^b67IxGHoEYaZ^&$3QG&T6ReqKBi=hfu+``hu~u<=}sb zU3zSN-X9Kuck{|YxT~D?-^TyD?|-1A7_N5Pr?Y*A{7rat^o4$=hVSUdRHY6Nv6{7bZAuv%QoTsXR*|li8;$}4sL+2`s)PdEo zH=-}Sw%aN{=ch<$VI1Y^`Hwl5z}Pw8f^**Hs3^y+Telbnw9)ej0gLf1%QfG&b?aY3 zmSHfQEUe0|sdzZn2R{{Tt{CIa=^c~+v}B0c!~udHq3dDtDd~t)QBuS3C*BBNu){nR zNr2DEvv=gn=KesLh(x?WZ?)9HeqS~5KbWG(!c)2d7-5nPH!xO|UyCS8jBUG}9Vg_! zu91J@xwgql))j_Exq3Ku&SSfhk#qUarNX|Tpdk~1@DX7E@#v6`B2LB)%lY8#TeoN? z0;!HzgboEhjedD98f@dH_I55kKC$4KjCJDT zPc^5G=kT@sqCoc_UvmOUKG1nr+QV^{$G)RU{(gVn3MEE`2>crTHvBXGN=*3P0kQ8_ z%+FtrnD8^>p8Ob+TCWY7+SuE{zozBw5&5hUEM=DJc-w7H$@q=wYw2k$f^p!#tH9-p z^{+`Yf(K1flOw^gz1zER)5x_k6#IL_s7VivwF{5>ZF%1OJj_gIdFDEft>T#fS;aF< zdOvW8Trd8V?ycYJxb*4nu?Zb{nT`(J>D@&b@Yt2!`u>C`=N=kKsR(%cWa_mC zV5>UO$pM5r7_bVPN=4`#PqPdJ*M4C(1nhl1#rhB8snNJ}GJdvs6GRlnwNXR7tU!z6+6 ztL1l$K;woyf84yV9yBHi+t%-&AtYq?Q70%qrbftBB=SsQZ*@Rsy8nF}n=yXDwCQoX z{~+@+F=#PZnA+OfiL#qu0YV%P00o8B)zi^ps6lrQPQA%#8(uwSfjFJVF@c^v)!BS6 z1>5^>j6Dd!p??={jQ9iB*2#UJpJX%d5O(d?0a6WtHo(}u`-C9o^aC8m_aGGzL(4Wl zbX#B>b7#xBqTE>GhM0}7UJL7eqr8I*q&w2RKfRiKEbZIcMFtxr{r|(aA@0q8IXA9P z|CE0*YWfL=K%hzROT7qgNvrWrP{b&91=OyiV?;utQNwwf=G_35&R=yAm)VmNx9U;u z^=BTA$u~P1CnZh)IG0vd5C;gGBmaR~g;KDa@tSKKyMaPbcv6Y$4?=;_&XBDI8Ng0T z0Vp{==Q1k`%LrTS*)wk9FHr~+8U+6MOMpa#cUOj9Xl!oQzw$fEWLv@+oe7uZei$6H zpP!Xxoch8zCO>?7SANmMviXanxl09L4*Bn|HFm2kf)$pr{F_{8G!;cgA?k>bYAH!c z$x<7#5GE4_YC}*h9z~I?Hz)$#JjJ^~f0LwZgMF*p>3ngSdq$b9)dEJ)6X%s%_K z@#8!_J^$IPtud`A;i$>L`yVzr_rAQ`(`Z!FK6ZcSO!&8a;35_taVa$P$SM_r&q1&I zRmg0GLGR@x6_jkCBdlaB(G^LhMg%?0+qZ$;wxJ=h@}p?_<2a)RrP1wPBZE>O>7C+B zZyPm4vq4mYtexnF;3I`OTm(#I3BqCK7TRr`qq57sssj*vDBLAOjqspgw?S2~w z6A3A!+{o;y{rZ$3|vkI@0XCB_E!ygRSk~vs7loAuliZ zTJOcSo_HWI`R(gbqC8Iv4qSzV@4PV5{PT6{s1)xTYsbMMB1^s=k^KBMG&aT%Bd zmstPbSB58>MO_2S;zgNC8KOlJUFK8A7lcHNyvH0wgj(~qNWvWdZ^~$dQq~UHM*Y_+ zIrmP8C>abe~tbNm0imGkaK3j8aplPf>kH63mGvS*BsAL9!hK=Ac!GHw6! zK*hZNl%+KY-@BWYN>8!`z>*)khw+V|EUV2MaYh`aqG2~H5!`Zm_N~P~L(EM~ihu5E z)2$pm=y`+!@ixLojR?nfJ)F=w@IW+r#2++uzvE_JZuqBI<;INsX$Xj?Ldn@|Y&vj; z{&W2LR`-17bGotewS#b5y@(Tk#8|Bn_6^h5YvILLRJD8mRCyYPQ%e{VecG%$IFT#dkbJ!G)v^uyIy5)LK^n;yL*_GP>@dU$K|t{$$7((< z|A)0UygU({+$gY4OKnJX3`T*)Hb#^cmzjZkIES43_y&kbKp|o30>^GT_370fE8G7Ox#TKK@!}o}M(;IRtT$ zm9NgW+#n0{pMPE+Oyd)1;hORE0bBxp>;C@465Y6sGf|ZkAhR4Hb?!sIlo7N+4K%6j zX&3_}VPt$Fvf^EB;A#!SL}F?0Czdu`m@d2jsq%t?v25@YbN>4iY08E}rVw$Qh_+M3 zM2XZ5T?fE4D}qk%7NADLc__#Td;9S`OYsyMH7o-qIjv;yQ`H@50EKqG_An4!i8ija;K23cGG^z;vq*VP zP?wR<9+|Tam)3g+Y8kb5&dQTkxz5XL&wI}2_3|m91W-{g>*V zxJEy}=(<8+Sc%(UYwR3VA%HXVRq0^#QzUuAaJhWe^2GFZ_M)h84*a(BWG+r;UW*BBp@%u|<>m;BQzY}?|c)?*zZD<#Q-@4FDF#$x(ZMswU# zdJ*PpAV%DFz*eery|b)_P(&ncFAC0VIcvfYd+pa-pzBlbl1>J`aA=MvjHPx z@`oze4gDqCa0cXQaO^OG>-o=5*G{h2;bguPH93Duf292MOpfKp9o-f2?^fQf+ASr; zikF}dtaHIAm?fNtUh#@E6yU+3p-Ru;?UewhzdFyZoYU7)FDGFX0#ljGLOkR0JLhk# zOH6K^B4q0)Q+uO*+_d#NlWd=1rUDcVnVdS;KCvtJlS@`cKO~OSx2V5z&w$@bp zHL%MM&P(?d&vX-fx!m<*2euyo!=75zOn4Y;4SuThfYp4N`mL zpF43UrS>)Tj(*he8ME%N=6l@K=evT33}2NW6XGtt2B#$5?u`_jZnUwVc!0NI2yY}` zSDJ<4=XInNizqIf%S6p*0)L?w&K19hKb06YprvtGoJih-Y*) z>+s{{PW*kjE1xhD;}Z~YD|!!};ue+3%;w0`o*QRf7^s&|;?Z4wcwR&Obq%}?)hFEw z%bA-6f3FynBb;YKz2icD%`e<-xZC1~$G0Ur@Qt@*ha3eeU;O<(XK%_UC0P>6wWlv% z%J$4S|Ki46gBuT)3NcX*u8+rgU(S3X-@#wD{)M`e{9Iadjnnyfoz4Eel0}i{&LyJv z>j35UY+=)b_6$B=CR{0_Z$-}8rIqsB-zLBM`YCJlHawW@Kc4SAK440<`u>X^+ITs} zSr~JUn-wW$NvQkV!n8#6&@1uHn6dl7e8!nuabs#l)6F*lRjXv&43rzDp5di^Bt>0p zT+#N}7@wp*-DdYVa{+pxTGEHma}L^@t_I_?O8L^o#7&##D?V?JIHs2ue|GdDn?8H? znJMbUgPnxF6X@kxAgUj_x>E6<_W3^Js(~p^tK4vTr5Q#;y3WqGa2me_+L~;UHtTVPTcrlO0===QJj&&>P85{NCMNe%odpHl?qJ*DqaWu`RUL z7YIdL(Yr(6?;ZS{YJ(5qBmV8(;cjo>r9JSDm?JK(cE90J7LJu0IDx$((}j~{G)TB! zRp)kII=BWcPX(H$w=h*o-&LlDb9)Qeb$3ArR_l*m?v_FZQkWhm0=sUn&A5Zs;tJ%= z?HR3?<~40;J9Kj^pL4kgy*o=IO!VL2AblP>j&YJ)>3FjFP5*RTH?4I2oGXH3*S9sz z|0n?2c1YR)CY%AT5C=5c4kzGGOgkdZpHB)D@mPEC;6b+5ROpug-fzxuX}OCizXe-w zsbZ{Mj=ttYXJ;}pj^-^`@Zy%1y?r8xMKQmAd126|g|qsc;?B?^#7wsb2s^!kB!+L+ ztgdPSCxvDs z7fk6zZkbH?y3-L*_R%Tn~T0c2GFC_!#FfjT#*pOej9G!OXd*Hz4 zDmns~+A_>JTRJ+Dz+X`Vd|qT!`l7VdO;3h#&QTXQ({oTi+vFz3VRh6w5X0qqo7hEyAE{(#&OslLOVk6#4IStBhUw+cDSh)`4s8 zEwklL1b5Cm|H5{=0==0TA{J|jB6k;pFB@B1dlYKbS#GJQJ#VJw{?q}<vKuT++19zC190gTInz+8Sk_*Wl5gFW|sh#7`35(u|vwOLu@DhT<-&*M}V1j zNrA;jztgl{bHlQ}i&mSTq}~)s*cUkZEOaC1#HuKG+Ue_%XLi=-m_UGGgvK6Hv86c! z&BvwU6mhZlTF9&8^IER@mSmGW6#CqIP%9(L;RbLZ?2@G&2RQDYV(ORK;cK3{bYhB<3n9Z9WbEsx-AqYg=2H z1q(J{GgvT3074#_`SU*~cF4X!)zbd9($ePNLS_}j|2p^U4xD$&>QutB{;bJ4Y9=RR zDut5Z0ZM{TCx;sZEW69Q7v|X9san6D9X~#*^!=06SXim)Y-VP z0sxN@Mo@)6a52tEEZ+U@mHeK!@nIB7gJLTIfM>vtjEs!xJ3CK;DQt-6@wCU>v)dzL z7#phC7UH;bsstO~% z=-xf7Ji4oYDRgmLd+Iul`*o*rxXjgtmxf{BOTAYY2O*v$^~6+keYr(=X5#E_3-1G-Glg{^#Q25*`(`Rerd86$m>@%Vn9R zFQRKF=D2wsv)cS3FPq&kNO80zwq0@i)GZXf*whUVWYx)+zIaiNt}vB0GmuU)TQ7NX zg<`i^e;SOTr8Ad0U$M-^8Q=|N4ebPwzfG$2{B$*tZ~F8^98vbjtY+(m!G{Cp+Udx?{d>^uWcWETKeqtk{fKeqQTE+b8`aa(#3?( z@TwZy{x7xLrmXnt^Ci1`qfxT4(I%DNaD++IR#)`s5#P*URL&_2vT-+PVH7;i~7_WJdb($Y=X@(u7ddfpmrYQu5+VE2d|6~J7F z-;|ZFzPCJUX>EF#mwmfJNSFUG6<*quWL887+R^#4V{Qcv1u$I_ZpsJGXeUU0r56pxD6eom-5z{cR^@ z@8b-?dMmyWuKS2|r6B_irO;{FEH^%GoNxQymZ;*}@vR%Ty zxvWkvd9*lqAcw|R=08nX4p^tgx2=z%{D`4D1k~g${vC1gqS>b(+qP}1zGdY{gxAHp zUa5kVdgrXHSt1aFs-h=mdq9>teI4k$Y2VfWu4yvyxcbZB%|t_R#pZEy0+yV7x#g@% zT%*JJ-5zQ7s}9x-7|p{c{(4yXci1G**Q(QY1fp+=Kyid#Qd3hi0i4D}+qUkV&d`EBzQ4m?I^%2D_>eM05X-vG)LrD$CY}%ha|3 z?N)*qux&sQ6fl5+AdQUzDj-OdXg~o)lpGXV1zQYg0g{svBr7F3Dk4e{C5nOql8Ydc z^Y`wm0-d?{zt7JzK0PyyRi{p!z4uygT1c$NE*%4boFJNJ)CkaDJ77|_7=9Tq?Ki3F z-U5lr>OGgjfkZue{aPNq*a4jH4}5A`)*pZrPmNlM{4LqUDm+?~h^-<@R&oY!7pH8} z3QEz8EgxECe3|!xdj20*dKck?u)o^udYuRsA(W{CUvC95gHSNn%9W4MzSF~Q@Tfv6 z74%fB?)q$#)$q|v#6TYfY&FRohu!u7bRE@Y`=;Z7>+0E<<1DR0Fd)jyBgpPg#jbo< zzDp9QP!040Y&0m8&*DnSwrmva>nlK-ap3_SpH|$>nrF7KZY>U4FKMSjR-mF{k(gfL z7E~|B2VE9kOdUS-KuR+)$oHhLtE2b(YjaXFCWrM+8q9mS(sq3o5m-F)N7+6Xbt6Ip zGe6KV_!;{|A~HBkA-l%O_I+?r9krV^urOHIW#Ms91ag9UiAdN<0RXAPeWKf}zf`={ z0R?dtfNc6r$}Yp%Lq#AXFHeRIgc-;jFFw-uVj1C8G_|yRi+n_SpW4cfGOHz;nf}bTdxf3L`wYNdYybV5YwyR-AIRkv%nx;Cpn|b{$J+@K|%4j^Cpn} z`0sPhLCR7nz(Z5S78Tr?zMEIPmBSF~=`_;~Kmj4@9dzvjvpt>C%Qv^@M~ zqa6;q0T_SjDWA+M@E{Fh)SyZ8>-$Q@6VBcBnW^taBP2Hcec-@@51-#QZ{E|$Q`b0o ziI4uwc$4||68~Rn-c71s@GLdnv6*A>*JZt2cFShI9r90G=Um;g`rpIp0(@jnHWXRq zCq1NWN>godCEQ>q0tbkJ_@_{+uF5GRdm!Yid5Jg+`X2#(08w0hBjN%{_ez8KaZ@P zf+-@5LZLx~J@cLgk}2p8ttVwBD4&+K4;cP9>!qLa?yG-Wd>1Kn+(-4~|XQzvwBNFLg(9e<*QgSZLI61f#5Og{0P#5B<43 zvOnBT0t3^@w23czur(I4n#_#?E`N@;zf_HAKE2AA_k=_A?bKXmbzIi;OqM9y$#FC} zMo)Abh2n^mQ0k9W{(`Ey0x(EWGXG)20&atf>;U49ERk&|E&b@I3Fqh zBp4w~aWElmG9Kthj$yEmPZWgdy+x>L)Rd+8QGohn{X(fR|KfUbY;2&+P5aZkfXWS7 z|FACS>({s?+3?x;OF5Uc}C^5VN_V_jP64YvF^>OTM%|;yN+@+L-3&3sEBF+ZLtF{V#9~ zRl%hM|mpAubnMgw#FCXGxdZ-Q0B}ex=#deWzlOkFOzg`uRIZ=n4 z#o?rbRE;j1Odh9if1R&OJBsLnQ(<&mLIiTx%+OGi5*gn}OG`@xQpe8X8~HxmR^988 z>F1NAkhxT9FY&~z@9yWs1MnOgO|`Yf80;N#J_E2Vjdq^<#z4KF z!BUz;vA9O@yS>a>N;05g@gf+|-k~a~M8bW&&8PGJqDEKoX)58#$hU`dQ4^~>2yf%{ z<82PON;w!piaCZ8mjg{U;&G!9*Gdh)etmvvkc1}Y%R4mRKUfKXiz{k@i)5E&a-1*I{4LzeKVJKu7t916j)4E=bX(X-Cy` z4#%mv)vol;e4Mr3){#-x7n`_VM@fN-?GT2H90hs}L@E*?`K#XUvF`LDCA<_;f8T!O z**BoNiVJ7|c3umfJD3K;awU7Qbc=xb{Pia#&1PRz zZ-x6aJvZ$yua8%Wruj4Jj3pO4*R9~|K$k|)XyS-~tW37-Y3(`EebZ9i0)8}e>HU>rT$I$-KKRn!ONa)b= zeG^aM#NbyzIie})BP-isH2ZC)B{@&h4agOcUIzXPdaz^9Q9N|~3aF6AS_3Ws_Y*;6 z41Q~R`<|_o38T*F5UVY1w~dAld-E(FUc@|R47Vx?@kt_=$$yV!B#M&6g*{*D-D@4TUQD>OZgjzp3-$nhbG|3pGxyRboKYMrpo2(+;#}>3%jsDz z=PVFqRP}$bt={<>aHoZO@!OutSGT_`rHvP-s&{YqW_Ao(o)w)r(2>@_6%%N^$L!laYGPmE4ff~RoZ~&6Z7T9166y{Yz!Dnb7J$>3r;*I{C zT^lleYj<17kQNYkr4kvFL6Y{Wq@aQaC>A@m3BDZC2I|??N1(N#eVr>1(X}j7|4^V% z=9IvZ(ORdS+ao=&x!!z3b}#xRoef5Cfpk_xd7y`9=jM(^i7&W&_XlmC8$U0cFKCdx zfJ|bkm|&FfYWO3Ut(T62mqd{YLxU}WM{}pfVo@lcy`)2lsJgl%AbCgszeeKEn!4TD z{@u<1MN}5=j=%Q}7A~MmXVFNnMkIE8lWHC|Ce7}!45e&7a#3k_FnU8_SG|=d5x2MZ zp^^U!ThlR=S39BIzl_jKVWnR~pGgGR>i6;-Kj&L8cF@$KU>Wl?P+TZ-M(-@ZR@0h< zAiRJ)cNqH<)tTzbMLkIRvW_)<>ivtM!9j06loA{zKr%SExT25=Wst%{QT*6^@{m`s z^K;QP+@Cb3^mArj7mxFwyu2LT+#PYn&+lme#Djo3@d^q$KcYj0D?wOd7=49s@d66< z+1Cl9(tWfyqV!`a_jO#Ww=ve9iUaNfk?O>2(_Ve6S^k9QCoVu82rpa9HJtS19wCHb z0SyoX068IX@#@?EQWT>l59Yku^mbcGk=-agy*-$9bzHbQ{G}*V8663kJ>@tyf~>XE zxPN|WINqM2TD__&i!mPLyN0;26V+1{sa}w95D^tc$3Z2&XRAwjb8N*vdg`rBz{4Ecvkq3etJ4x_v;hKUH3hA}qfAS2z4bF~a0GSHAgbOq#_RT4smRJ+C8QgcWDHar zgltk+G{@_}N{_9T3mj97#067AoX&k_-Np5|i+HZP;toB(W8XQ;L`W*2$=hq&bIxWd zCE{Jiz_e*?ZIGLG;-#&Xd}$f1t9yg1!{XUXSG719JTl83D3eh=s0$zKreBIdAVuv( zir4nqA>HhYR>4M`t$GK_{ApfQD;wVT~)cf7Axuio^5Ntbr0cgf148#f4PenHQp{ea-Y(fbP$ zlQ(WX_9g=C?OD4le-G~0JM7kjz+!y8R>+v#^YxC~9Hp_^_|DasXw7s^)?G?H!NtI? z(4BK;s#uH-nA};lO|~kwzb>Wxry^q?2KXW4Ibi3h*T3PFC73eNfcegZj@)4vA;4`lP8ZZqB|0`+#rQhv|)aqIc9J5j}d3fnwUZ%TXDudNl~adiW7l=E)ZTI)X5iW(1+Nc%y0^!oE&LcGdj|a zqCx{XRpj={9Xx1IR{c%%2cBQN;Fn+Y)!sK?X$u}tiCeN_j==e;%a{C=+9KB&Pv7o| zFzgljf_yQ;s%Iw1zTn5~9*5BupF!--LOXV-k?!d7#c&?CfUSoQemeqb+DK7wZ= z3Aqw5G63;CY^y|N;Pu&s`b=oFr-ZZnwd6TpwuBFVRn095VCZ-9LdY7e{!zxKiNi!w zNdme-XM?9ZOtr##CoMaO3IZFVx;(73C{MBc`}V=NPvzXSpBe07i?_>U)|L|nbsOd= zo%vBm+TgEEPqgg=XCjI&>H$GpWDyPS(q*gY3IKp_^@BM^+cM2AeF z20T90GYJj}rN)*Zc{Qx|Zqi#HJn~#Lb3^FEp2T-=nNLna_cq4jwRv)WeF(YPE_P#e^6C;~dL;Z~x(w904OwcF7wjdZL3q3s~R)rpfQlQFAt z!TkBY>lV7motMO1fZ%#_-+J|gbRcWYdeSAgsU>a+c!*f2>l~ofCHQ5%RmBmX`pBo~ zm2{pM{$pGg+#Pq`XoV2RtVBQ^XQz333n!!&iiht?DJSy36)Ta=o;P^Tn}@k8TWm!Y zN#?sevuKpH?kJZdI5&X8({0FgSKC!*agFc_vI!JrqvToCC)V^`=s!STppnRX5xFT| zRn{iSXvoUD$Nd`{vr%v{YXp{rf<_&rQ7IY*4#8t-P4?K#)N!(sqgVUs4?ze6$J3@A z2-tN&Tw*b0$=(N-4}=8in)_CXtc;bupUM=$RXG+)j)oP%TJ0^eKB z6y8L`R{nqkn%$d30xG7e7n2Qc5gIrmO8~3jY_AV@GEhZe4!R}oa&wgH7%*PRF-%;l zJ(p74iWdTSUV(ZyA+6e&wpm1^*S&B~Ls_-n#1XTy?j0pPH%;RMivDKq2Qo)d%aKG> zqHW{m=GNS@7Go!MfHb~C0&Hj8`Nk8$6SK7d4J~oPJK4FE-4$|59^`K*FJUiD#9>0Gl}T3s&J$E1YDW-%!hkEJCcfqSBg#J2 z`Z^z-sisu1>c4U%x7AWjiKpVwwM?Oo_TSb2jgEX;!6}qPwG83zGI1P;Uej864)PH) zPXdK@an~yT9#<~?w5F%W&C}(|r+fa;_63>(Ra7?Li8&9xTI?RsY0_<>dt}}j`H&?B z*g02|5Y&>l84$>CzlGwcQXzO1Ij$gEsO`4FSxmrpHK2My>FZ{JEdlLd4R%q>MKXT)`=lQwfusw6Q__RweWCxrv-42M674%>pl>0-k`GZ zO#1|x(P~LAzf$$VZ9s1AaEn7;6PRJuajBpm+uBaV6y?JfYcz(5Q&l+Z%|4@sbZ~vU zoJaC8Iwi2DE&hq8-B9Gvc=PT|?i#<|8XmW+L}x@pGdpXs`SfSE(UIw&C@MmN{7h2? zX~@IKt4^X9z`3Af(;d`ZyePthRl+-qZ?ex93-Y-nsgO9}Agi!w)3Gqq)7eQ4`{gvE z8!ER1r`+)AIvT1r9) z!7Bf!3Nfww4?gWEhIUzHEqO|Lw>IN!cE}7pNt#h3XCFFQSxl z|5bp+>&aZyio}&a?;cXA7mdpu#Wf6zu$H% z1as_V0JBlKxDP1n>eg{24nxDBNCo)+{6x#5B}>%c_QTseLY>G?U+uzi7;2`spJ+>I zTqnniYHTh$-y~Ido)d5;Ou>z-lw7~UnI$@KX?k)PI_;~UdXOMwiV*WoFn9!ba&~qG z;_|HF1NxfBmESbue9yFx~+HLO1e{T4cev z?tlap_KpBF84EI}AmbIHOR(2Pqec|mxwG=kqB+ApW@Qy-H!ay{ zphwwm^%yZvzPGL5hyXW|^!aj^IrB(QT7R){9TQks+$;GE2@gOO?y$O8m9-Cu+}vUu zX7hT;Irc+ul4G7TYJ3^gyJ=lo-|5`t2x|z#lSl-IQb>N^$Zs8d$ir;p)0Zims(wZv zfXWErO1ny6T8{@et3@4nxl-BduzX(W46tjEn-6?;uPpD#$!kT*fH2k(n;Y8weTC%F1eFK0Wg9ww}U}^8JT~GHR#_x z9r8YkXe|bdUOv8X>JXD|7FdL539+<#00h1B8&$y59)rby!NP?S^zRmytkZ-2Mg%Eh zzqT*(m}0OxE;KaD2Z5UAnsWa|1uuj6(B_B7_|HJLmGr3_*~q+)0ox}@AI4&e-EA%d#0 zGKZF_5@O&LO7ra=j69Rl3CgJIxMV*!ra05_ZN}ay0%0egb9;c4x$vN7w zAfCW@1Y~?nIUjJz(lXIb2?-*&)nt-Xx+pir)^fwX;}_NjUGVkpU-aJ7JJX-h3Cbv- zYzjk0ji6fOZJO}H<=BuIWU7`2$E~@-pYrGJdzo6qLs3~lMk7La(a9(@_8>szn1e$` zPxi1r*T#7b=2MoY=}N7EVkK?|zeK34FUxf#HLFW8C^=DtN8k=CA{yjv249`8Hi@Fv*4m7Az!@aQBE`g7N3yZuyhEM|kjB`B!$SUWE{n@L;!#$=9f zOGAO0|3$&iqM3WrBB8BfnmAlmv|$MXWK|^9>-p6BNB|qeUQshJA#cYM%WOvS93;mG z?`d?O(ZB@@i;AjX9xqV|6Mgqt96{7@l)0v0q^RKp)gy`80GUc|FV5p5U=ROW_28}r zJJu7n;L5}@qP2e_ie-iKm|sCrUj7|UHDNHy>Tz0N$9=wBe{M|cz{&ph<7(kkEW%pL zqmWU4ZebW5@&vJ@o*}5K8(JDj9V-A#ol5;gNIh}^XvRe){-mtjs!$)Zwe3#s^0g}4 zl*Hx+jrabG_omd{!l}qOd4sS^Ly>z%kH`E4S_94QIg}?n9t_G;E1VpUE*zj@)vdW?1YVBkM8!Hhod8&fF%0 z1qMCp?q0zs-bk6N)t@l<%zWJt?i_P(7Rg)F$04_ z(XbuuXLpJH*39C1Bm9`PhE^$v_y_{70h}E}9+DNpN@%=uELJ-1@aPO^9(2?+~ z1D&J-0+3j8cX6I;E8m58pO+kDqa75i4-=emmo%6b2=ig|)e%5)W01kAV`LanON$3| z`&Wam#?HlIYo_&+G|G7&-4-VgK|c|m90!h&C~?M6HxO+>2gJOZ1X4zC-5KGdzgI`?Ko@0MEXDq}rf{}?&>t<2Lv^p@sDF2Kn9A@<7J zLDQvyOF4^l_0@Z=9s}Qa1l$VUcQmkv)e<&w*vy>^lA$vtcbRa!WM}XGsE(d6xAO{i zd^My5A^18BDZa#@0mGohP_ZD#4@6M&0hRiYv_+`5HE`b$X|xK5iZV8Y3W!E7(7w(Q z_-jTK>1wy|hbYo8^2C=7WR^$dq;)>#7!1IJafoW3(Xp}Kx<-#wGsI~H5ui34>)yoD z14-waYQnUk7fiy8c}S2q5p@VkF6@MU_!b|b-q%a1f%Cx{Ed6WqRAR2efZs|i6-Yc~ z2yqTGzE#1Yl=(k>Rl>Syp^#CU;T3e!FGYYGLxFb;Gbg3K<;}~0l!asE%9A#SDg0fx zDq^h#gx-icCSB5$8*^hG4laI@-^0$szfBYb1Mt(eM+TS+`i7w0fheT{O%*ZOVOY5K zfR@KBOgE`N}Wqarp20IXXCc;!`H7jx`1dNE%Cqi#F{$D6jio_&&4(jeq3 zMwNs5;XODMuY31Gq51UOK3Apa@a_y5?V){F)}Qe_BJyg;JGGS8c>xhY<}KPXTpE{kHikhse;HpuVM}*p$ z=QoCGiW)5-V)EA7V1$|J)NqhE#V~VFp;Z@9aDDJQ=>p_YeiD@|s&|B^J;XD#SxRa$ zhE7^43}@iy%ZKpDO{Pu)!*OW(%r4sG(G|QQWZ9dP~1iKiLRAk zxWHbL1|y^I)tW<+^C0<$!@c<5#iL(tjI>;3nvS-#B67h2V@Hz_YNs$g&U<|yK~e_mEX$&Cbwt=mmi_@O z{dNltDlk*XCKuGG4F4OzL~8PabTv1GA)(J7>8!LQT^|g4%XI)}-2yNPl1Q@A8XwFk z`Kcie7y8P6sJ!Errqq?jQ~uSG6H$e0vc`SOh3tj&Gz~P_gEMCCFU=&KIC!M>!-#`t z!qN5jFSiN`hnoibuJ;yDvm{>N0V5jBh{(tyeE+te?W2es2;1Z#Dq(ZLD0JY6t0bp8 zm^pIL!u)jQ=IOZ&I?UQkEtoggu$5tD+B9QjqVzOnEMC;pgivPUxdV3!NH$2bXc_N1!%}Q6H4~0s!j?V3Wj$?s#Ke|-rlnIAH%2OW)ch11nUXn9OVlUU zy`S|^+^&ZC0m+(=!LV^Sn8QIgHCpCu3VlE)lHagbM&;$1Ij;SKGCY)Dd;VkK#XM2R zC$f(9ziGW^_ONw#d}`+;XyPvd|041&vTb5h&*};UJghLC>3zy*`;Hw%?v2cJPOl|! zHFI!s+EiKlw4yS|?x|_)_^}r_ccYGl|FCcXkw4@rG`C>$!kQ)=98rHjBw2+Fm*yAY z6h*}YQ*uWn$6!x-zf0FcW2Oe|dHra~B6y)R(V+~0!HR(ebBxG}+q)ZZ=JKL{C;DUD zP@i>Q(0A}f1}>?Rfly(tzg3HIpzVNPneE17cd9z@2#Vu_wQ1C0TKd=2#aYJK5F z>3kPhFHAG^4?&>gl0|Dakdx(SF6n2^1r ztr6mCB5VgED)RgkMb`*gQ2(H;x1wfiG&ECPtC$=&#TJ}2s^c2u#|Qbf_@n2g=ZyD2 z$37VjkeIk!fVE_H`s02mP>G;n-oh!nlzNC`1~8CGKvGf{O|4j*{SRl%>Zz+=!B4E- zb^^OD5>nneX!8`}Xd&f0el+g;`DuUi&42hS8=JbZvGF3lxebB<$X@Z6+}FE%)i1a7 zAWYf918*b9Z!7MCfttM#l`lE|4^bib7{peU{jQ%pps36hRMD-I{@V;2EpUdXJ!xN; z!8URhB?ok2GsYM?CPD0kbd*ri*yTeY=SM>3o_Lr@QTh>5iU|C_M`UbekUTe56iN(c zkP9$;fPnaQkk`DXL&O*U`0?Wv^7Hf5TO-a@1~0kkR=fXar<27Q9&LHd=sWj(`f{|d ziZ7D*BSP=}p+Gu8%#(>jO1uo@qCPlk$oB`W>0>asH%p zDFAm74S~5CFl%D%oSN))hB>!8sUO+jgLsMvVn(cIq%j8rl`bvnLQfI<8k!@~f6J&k z{oKm>)jBLHuuM9~b; zWemp~0X)xRhB&bt0gojcI{u{zbE)i*5O+D@>-S7&Ax(+f7);VbfiEh|0^u7wxaA)` z9x94m1WG2z1JNlXmXuL6MWgUU?$Lcb>PM7*ZEy%I+7s)cEDSsOe2w=~Gya`CPZe^p zO|zPqI&HA>=DCwLw38Hzq(uk01d4;zz9k5u05amvkGO;;VOV%-cj*ZICbvJfbF?nzyH9HcG=8);N;l2_L3FlP3x!T%l< z&ud}s)^?Hhxse9_>6eX)Pzohfqp%D-DIVm24mGwcS1QqcFccHKwc zz5t^k8CRPa4y~DY;6_}MqFquNdVh1hSD@otQAg=WpCY?)CW+}1_2{0<3Z?`g zZ*2A?3{9JIY)s49J4rLD-aBxhp6ByB<~@jWK{yAie=?|YQXr7+f(3UhnYSNJHEarYKcVlljqd0CT^l> ze3hvk?DJrKx>(2?C<5Mb8Z3vu4>9M!vbnYTBh<`q-k4C>HBEc$vmKqD8gbIsM?dF~ zVA9Pl-YW7Giq~F^EsN7k0d}|?vgxUdMitI!awos>Ua9e;dz7^MZwk5Sx>Iub=T0F< z!f}+<$1(q^pvKmjzX}0aG|=sUb@TG9KU|r5!Wme5&KI&4t$0~F>+Pz@+b!R@$$tOy z>7f4)>SuCGa$&=nt=deJgUcB+ba9bNL8dmhPrNB9fhMN^8SHlFAr1!^r=p~ESN{r< z$dF{Mspr`BVH+g(2Z-bV?fO#=!eheBrSRm>#fvBn&$pog~okP)c%; zbu|sk$io0W?J%;RALHZ~G%)y*tNZ`MC%K>yKH>p1;qp}JiEsM>%yX^9oONH+1&;v- znE_@|aODDw*qB#Co*pw|l{j}O2<-kKCtk{pg)=m~WG$Q(jM6Qw>*-R! zb}EnpTMu<-e(M4)(K9j5T)z&aEdKU zFlX+y8V5Gap-A52C#N5aqol|j1;u?NXm3^5Dq$E;rls?sN;C;86e+ds{%Ead=0f#4 zUsgpw*v4(#`1vn~jTtPVlU9&NEe-^^w)YIk@fIrqNjHE*AgEedHBKj3nFQavXQ5{{ zrT9PWga*Y+8FoT}Drx28j~>;YSxO5t>h|`rIl4LHtZIzV*U7678~*FhH?QGCSFYq_ zW#5{%+eM!2eT(3l054LG{3aSJv@! z6pNnL9lPwE^ISBRZXg$QZH*rj{Wp}kd<1G$T z0)HXDi4qaa(9_qE{mk$@{Nq`HnS1`I^e`2|q%Dq!K5fm+-P1kF=&gblX4~dKO#H7~tYcxX9p5{nX`Sc?7I^7!D5N zId#Ca+$F8>OOGKJCV>bV2P@S8U~JtD9x;lvm<%3LV8;$x!w0lwa7NlhgNPyGWxD>a zifqht)=R|kc}Y!c{QYg0Pt8O!gzjuyw#X$Q3AfJaw@&>Z-zn77=Bd+z3Yw%1L<>bw z?HU5d5+61h*SoSpFMz}w!5T;nNArxW3HTjJFQ6h9l)p4&-G<5Y7iQ8(Wjlf6^dQG{oxw!$(h4x#CKMKs;ygsGP)h zk5$I|1wSXm%sI-Gm#YXK#qGVvxV`VFw7r)v^l|=jXi{?`wxKv%@qW2lpjXk3p>GP0 z9qt5pRb38=6B;o!xEp@GL?}dmOK|E!4`u8zH3a%1_5s3s0VYxctG-JsIB_p2s)Z-mSb9Yrk765}-4KY4}OEkrFW%oOGsxY)*r)i}?{>in%i+=r;L~#tD z{lP0~6NMf`hw$tFkqx57{ch_SzTE~o`jMZdQa*^@>Jy3DFjINRn86Bvo=ffRBeLyJ+$8(u#|SUk*#WD*GA^KE>N1be{RFZ z*}ASl2WPbs&gvZ!zZi1c-*rMf!Aai*4x=W29~NQyQw0XeU-`+>;$oj8JxLofkTmj@ z(od%ej<^P_B=QKEXw;mdEDj3)nLs^l*%`LvR$+7$Ux&8!)*U5frl5Tl6JgrEG< zaaHP*s3Q#eqaU-F;28zyYy}v8R>BlTrcpfbQ{wuv9?=qq2WDZB&&8NT3uPLaZh7-H z1es+S-q#}ioMQrPE@r;24huW|1ShtTsz1*OFUUpHnJH1hs~%C-iFnz?p5NssSkKY8 zhP4Nd%){#u(CKzpJ$|tl-mN>pH5WGDkv$=!~(^O%73l> zf*m5Y11HYyv#%ZTUQceuaGvrems?uN0u48r-rj}|NY1Ov+NR8W>yPK$F&E4)dq;k; zZXw=YPI8h}S)fLZ%0v+0%m(Z5@DdmEi^=0aajE;#lgTZUzpC{-S9$7jc-_wYb9PqB z3mg~vGQJo}0*ds8KYxlb%3DnIb|RkDt5|xwJIp zeeA=?ziU{HZHD<%SGnsAeelbzLN;2VkhLm*ZW6QPb1}n%&HNRM{cqCS1{(X;FQu)X z!!aP^I>h>whhekV8>VoOt20ZgSac1f4msi%n3pT_p0omtpSqGr%iN9|v`=g_p5J%H zYRey)`IR4+BF_}|?kJJXg0R4rJWTOH+sBaiQ zE&;nCxt-pVUMCu>x$yu(Bbcgd3Og1I^H=iNh?)MPRZh}6#uK}9Z%Rnk`H9u2B%pR` z+{8WeYjKKS9-&!hWTt7<&t#_M4bFaQ-!9oJDW4AqGL;dqr7{~JO2Uu#Iu#PEo1m6% z;#O<9&31F;rgsdV9;IPE)+O_2<>ZwOzH22lz9e-@xlL&x_#-sIhBS zvX&+o+t?$eY$y5FrfZ}G*L-?dl@rv}L%??pl#Q#~%9fe2D4 z_J&YBOMGM@JbV8ZecsoXtxBRA%L)gM&O|lB+>CY4{1Hx>iC%1a(YaYTE0{@4_kFngx6pd~h0cI?O^tT>59fY~A)x&4XiuFSg}(-7g9{R=e*7bDvb%{^Mn+ zKc^thD@tWSs9tMlul82fWfJxSG1<|lms{v9Cz%zUz@FKU%=IDgqRb)H)YZoIfBHw; zzh+}SyHuP^%+DiW^uJ~pQusY)z5taM#)10r`PxxzH0NVIk(Y8d%)H3#8WH$wQ4_oGFZnmL`$GnhQj)H?>HK zN1lSHUa^v~gUQDxL$#Poz=b}$1Qa3Lx%*ji?-$Wq-xgIPEs^znry*04`9`7hC)%1X z1-;&5=F4^~=TMmRX(k#;NtpS_UV_lAjy2s9AqM&V&;;HBGmliOfg+(q5QPXMAW%Pw zA)&o{b&wAS`c)IzVuPb?A&_{oXJBwM82)be=`DI0&#vq9m)NwQph_#_5jJ0z5Z&{8 zDeI8h_-z}CVe;c5gn%a*WO@md`$N)9XyxyHeedD3iUWGy4jy0(FP@AXc+wz~&OHkX z3cW9w!j9(9Ks*uL>r?%CbpiQ}@5~Oi36|YA5beM3bD&_z8(AIO7)y5c4yeRj2O|Lg zG|=>PVgLdE*$&}CMyEgV24jq{1DpUH931MR(~v5Xs72!OMOgcLUJpst_Ay8>h^OzZ z6Su`l5&-b>nrj1?JA-sGY^C_bA+rd)9_{sAm`WmWL)5}Gko_Xt> z?3!lCjGvO$DE$^Um|wLE9nl{f6jTe`HFMTYqjMuNPn6ZsF#17H=L16qqzb~{&03FM z_9RRMmxks=BUV%@rIqwi{kZlpM3gC=_D_axp1qYkkTwg&GI2kac zzuVMT>bGj0Io!)4PNfY`3-=Ti^>XcA-^<0m;HN*XQCS9{PzcT1HOO;Z{zHa!>;n;l z1n3;iK>d&~0=_p8DqGj16Cvp^9RQ2G!4M#hOXCh|cPP#>n*34PlPBe4+Qmn}Dvn`7 zv(#v_Ck(qTY6dIGho+2}{db$|s%GhgbCIry~io#-p6f;TjFZoZ$a)jfHY zS}?vq?)guD8hd(u=;;zyo1eR02OLGBOlMXq(WFaAjJy9w+9=?EqFtsw!+$9#C@{f< zE%#fu)a>l+p8F%gn2me;->``IBODdMp1cZ*ii)a8LfMJ&%HUTg{hdu1AD5Ew1v2Hs z_0!PL97SXUkt8D-lznXBvYLlP{)o>9Pi04)i$(NS?p(8@ot*h`hiW%w@IOmoop6`2 zP(#y#k<~+zDu}h9z}>lC+!^Ch_!Z!k6o$%UT1Z@S=)+0HLx(D`AeWl`pCXXHON>i% zSvUehiR7mQVT>RlfDodf+hrCc<)}eLliwU?XYo{*VS5%Uxtcw9r(|Rl>yolbCaM5| zJjs_wtxPn?Exv2u56RzaQG=vYZ#ZhKV7r7@fcQ=ExCl!uW4vWt3uH#*5h9UXpv~mU zB>N9PP4o^7jDvVlkqR^qOtM9?h^Gc!MS}`>(p%X5JKM}*L_ zU|#+X_k9g;*lY$v3XiusG^}trS;5o@SGR$+9t=~&r3rEz&cbgZmY}B+-;Kg0j_^M* z&_Rd`-lkrA*noszUKrnle?0SIhuJWVBZupjM{fZ=fB4~y*r3tz{lC2hOYly^|q-VHu2*n_0fSkIpm?1SLCN6FZHdN(l$O06xi=gDYdZ($l~r z#bOdy3%~a@joz=xUzxK{_a^Jse#<`la`;4FU$))l5&fM_j!#EM^s}Uczh%F!|2?4# zkFVU`ebUl7uN~{)9wf(?d)Bg^?uSwb6F0rxQBpKEpB^o!$2zWiNEmbia<#T}I6D_u z@T2~rh678_M>6Ypt{J&Lf+MOaB~;!FJeGe>%WApn=zJ#BNvYF}7T|mN>XjlD8;C}G zAD+vIh=}NOwMh$Fu3|h4fHXbp`iRl#>^r>7SBh&R2{H&sLTnJpB0znq}OfW%kuSJ8$dY)Yn<{{7N zZ-}XgfYPXR|8l8t+ytCYDmY$+e4)$J!89gh0IipE`BUd8$zga*?n0gZFt1*PG6nw}D4dkl^_jAv#Rx71fX# z2JsGwF3jh_;1D7-B3Va}mJ$O6Fknqsx*sD)pSXDYV*f|wl5Fvo+Do*$BD?>UWTjA%+!+HqR)mrcL?H`m7#C>et_2KBr zSGSL%nps>Q^?%Cqn&B83t8WB+iWvCvA&nbAkK1>i;eVB&224Dm1T%A zhvGAfN=&>->A&&pTZwUg*W08!RRD%ryYP$Jo7#?#U^FK znWmIj)vFz~l(tFG_NaZX=>PZ?`Mutj8tX%}Ons{s#l<-mr82F!6g%;m&*KuFdRMp- zosiKG@TSoZGw@mPotd6ZF*~GU*hKE5Fq3Cz*DYms-iC8#8Zwob)3ufZ4o#BzTk0eT z!_H!Tp8PWNE&^4N!D9-Am)@>2Mo5Kbn({r?=UqM1qlQ*0zE-TRR1;#Oz51)FlBR3P z`Y_fr4Vz3fv=9Sh@9m4Mn;9erTu*O`UnQ23Tif8!=gCOXLLU)v`i!ksMzdJ^`meK&?H}|%fG5hx?;!q(mUxdPczHl_(-`UvH$mXCOZ}PItH;h-w8@#xC zEkp;#g)-Z~$mKg{TrD+5aHD1Il@&9cu$FExuK|+oRvDkh!&v6a%8GG++h`%c%ex-} zkP1*mULheDZ1{9>D8}B6}ACuhEAck_5uZ{6tzB1R>rFggBN% zOJUgi0`o(MwnLq>r07>{Xp0vux(_8x@rB&wZEs3Tz4=DR#xl++XVxd+=z&UiGs3vY zOG9#X$Vz$U%YPDgvoNQEWV$fc#K)3RGd{dy=x*p0g%ku0a6e`dsdzG%wYC{`j>1EwDNi>P2H2oHx#|jqKOz^d!!D6>p&}1(4JoI;O zObizwwVgUOJn{>ia1R}4_5hcbQ=cpc9h!9Wx0Gbw4!+sLTvwqO+VjuT)Lph?F9yf^ z^|)u25JhfhccrpoiPE+b{B8myzASL193xPecWev)He`4ykqob2)YXwX!vZuSR?gm2 zHvj`5T{jor*A(ri5}~xG&m99-@+7(ljry`a7{%g&w@K!+sk$rqT{+*+Tb=Y1WWJf? zp`82#>$a)b1)Bq`wv68v{yN37sb4wycOH~X@GMU zxM_P`cs!+_ev6sKww&7ggnlc$sxzleZ1U@H*|Y8EQ^6U8fv|lx-LxqF@TYj^i7CH~ zUi%{keDM$Jjdn>~`iEo(!x8O?ZyaGhki`*Z1<*1wj{?^9Zbv^)*gVYniAMV21aZeHY=>q0L(H0X#$PjXrh(T zu0JE;GnqXZhhV}zhGlf5l;mdPnu$Wgm8Yzzs2u(MGn;N5B7cC=q?$<50t44xM&N@f zT!|>fPiWC#fF_+VW7GsZl5qXClWYS;MRy|Thvd-|rKM}nnT`Yc}4M)UbwYTg^gKdqn|_UD;c%*jZX}p&s8V~xd~zL zjjN*&;FT=0Vnc(DM7l(fq-emO7zQK#5iU?-^?nE3!dQUc&fdNQVNlg18wx7=tME;Kr_s&{<@25JB_QpXql5$HvE#dO*{UFb9A{%+r!rZPmktb8=)^i!#CYR$BQE~>j5_DkdQA1 z(_9;(0miwzyHojsc3*7ruTER+agO;dyA&!G7gtIYa&jBLztXZ30cy-es$1OiL}+WT z?9*!V-KhqfE~zG(O@G}JJ+{sIhB;e2zV}Ay)6LfFJRHtKmWt%F$DlI>H*Zz|bF2^Lla8nTdBvGZnAy33e zBnz2LU_$iun@^vPQ!8OVAWl%2T8M-YJi4w$5c1j@V-kTewFBQghW#IT#)fu*`BmW& z5gpLoIFEiT1s%xzusrnauZO%IgqZl=Aw*B2+4!M9@o(}fB3ks`ozK+YE|&kA*MQT$ zG{)^D=sMjlDFin}z@>noSS8qB&^kWEZeUX-1}y~%v?sQI3^C7!jbMR?eE?9Ao}6)W z0+K+eauj2a7FUyuebW1f{c#VGb|i?IWMP;1?GS_EQlWf_NLDC%Py{Gi6BJS3zQsG% zm~i&qh+riSYjh=h?Y=QK7&1JoX5vc$%)^gyfD#)fD)rn}>x4YsHOx=gaNj0q-^29H zYuIR?^4B~*^5F>c>uh)n|2eS$7rc4}A@Ripr{^nleo7^SP9xC6n&ArFtO+35Wxim; zh7AsFLAHDf@NR9fiA45aC-Mer;6t_sV2F-h1^sAF>j^TkmW;qqC`TfZcGp^dSI8+C zKY3r9L52xmOfceXZ;3+n&vLAFlh|dd6)fEF-wY>xbsqopWhX*Oul7%MA)u5*cMjqX zkw_?<@z6LlXkDDd|49u;EFB3wA&&%+q>|7b2>Bb-YcNza8p{Q(R{4%H$w%1M*d-1s z%oI+F-Z;d`J_?Aq8e&(HVFvwlI~G??XZ+6T37;LV>2NDygIXMf6e{`{cz17K-^av= zhM@%rD2XA{II=~eDxoj`UlN8WwzdF}6*8J0trITwSU>&s3>yd}%v(_E#ju0F<}^;@ zz)$Nr2jGDS;jJ;FB?d{RB#fsD-U*C9(7jj&BSj~Hh`!|JVl%d?=T=0Z6yr{+#2~dJ zw4pXwjsyA=q{ebMlzjR$o}^7w@<{FmBmfJfA{n%EckP~sBry?j9k4zSP-+S?!P|n^&U_8t>mEyUIVl4bb~~R{h!`ttUliT`Oo-8!<#MZ z$M+sLk{f_miVlyg21z}`ur1=d!gw1JkPe+7I_pHOV*+=dJQ2kduKgqxouI!kj_KMc z;R<^)=)#cVK*42Q4kQrqHX>1{EjF?a$IxStYP?72Dv1Mx{ZjYPgDvl&jIcUl#>>kq zhHwwF&j|dwNIng*8e<>Wyn8on^T=;9gZ7@4MS81GP%)K}v;=~m!HA|5AKHMs=Tc=~ zo5hZ09G7hCbD_mc_Jvf-aSn-VLh1+^ro#56(%uIlqdc|I+y-mFhmRbX#CYB~LA>lT zMbndAn~7l0XI(K;E^)j+Lhuceb`{ZkV`DYA*=+I4aGV13nfJQK;wCB{c}T~gW#$~K zQ#tec&71eo_y3_l&RrCo)SZahB1;H}STz(nhfX=iezfb^wN(mJJ2GQ+EydUxl?dc5 zbCeW9-jjbO&{wNZ{8=eDa9|U^pf>KNHv@7&P;GA42yUv$Ze=Q?=p-YDx+X}}#JkJF z9P{BKboaJIt%IVmpB{q9Jz`XWLobMk^}*5z2namZa!gyydeU50kO3mV?_v=|FQwOzZ$_YFTa8dHsxBr%$=keS$qAUo<|8myLYb zgS`?W$#)Q864HU2Ct1{a%84w(=g*(J74lUk=pCmHz|XV#tpNhYKLd=A9Zt8}5lalF|(FLz2>*U%yg^{8BPsn@PCs%IcB5CWP{$G;og^8C#?8*eym5F#^aT;R)a4CZz2i z@!Kv3(W{_VKSqL>;H)D!8L4|vIdrmEH!-IXiz9)%`;smb282Y*5H3MiHxSEp1C|Fl zI>3oAP(Kp#OZ?5``@#ytA|hsa6*!N`+yESRtFBb*A|V5|VJQVwEKuTP;v8FTfOO}2 zyGP@7Pp`6y`aChDzrV-eFE$$R{|@Ztxn0^1kvUCWoQsh4Hcbbc&9_Os!|0yKPxvuQYQF^m#7XUO$9co}qXm31CIyJct99 zYz3WFhloHJNBq(@+~?!RQ_j8r_jg{PJsaUru_cuN$$TfVO0JwMu=f%>J?a`ovP)wX z5ko`dku_wck%S-Af@IjnB%E=GVN)m7c3R^vQnabXgUBf>MNuU z;LEd)roZyz&dBem8cJj(4m*%tVqZ*r&&_l3Y}~mfMx7#igFR!G44w1t`>qL9&fx8{ z2@f_Tw{#oX%DLFjUcn{gSa#z`lsAJxT1D0*99bNUD$mI&3p62P?AwB}v8?jXEQI+U z&I^bkUEhk-@7b_@`*)-*MLulKg0{&+Ic{gd@S^wM{IfZl*0pz?wb&p0J;%-QXBn@> z(0GA)Djk&>l@HqL73uq5gp4X?#Ouzh*GTy5X3w?hzQIB3(O^_2aNJ`a4UFAcosOLj zJ$L_@X~%htf%h686#bQ%#AA)5Ld-OZY}gZmbOLHP3^+I`4bj(hDq;9Kg=4L z>Y#dnaf~d_%ihU6PZF-dxQ5z`kBnJxXV!Z?Rss>FC21?GA!|W>LmN>u(iKr00 z?IQ2;vz=j|ES0$6oBdnG@qhHMFgIKZ`wV!c(zzD7X!32g$hy(Xu-aU$4@G*=+39J7!(y(1GkI$v9XS7ces*Rb{OC(>nj`O?tH9@Q%L^d_G;cJn+r?n0&vOwM~y4 zJ8#fG|6=q#$?y2#C$;CuY$X)KP1H$np}UA9gV^**YAEE{^jB*xR6>rNh=*1davR|{ zjS6 z>pYKRKlXh;j`QrCH|xhNn`vbiIX%d{|CTL>m-LA)3@$jlEn?;phm=3+r}4oerKL?N z-HQ+_M#1kM?N8lP-Q9BhBOsTzcDJ$eR&e*q}0SYz-aJIc0F-e52gTtmWOK40J1AEg$?ab(UMn;Iw&B zLk|sgf6W}M8_ug8nqGS9R$?+vt+iX<3N6jl_4>L=2TGbG?>qIW+o@Bh4px>VR;=GO z=IyR$JuiK@-E*d8Q{#cgvMqN-ZPvf_^5vx^>S}7E+ZW?SQAO)DVJ->-EgAwnu0qJA?`{=sD~3$9 zaJH2H!@8A`%dQmXyJ~$k+ID&P@#9m~M}OUVZ{Uc3TR-ZL&SMlWkGykxe71^x)6xOm zk}Qf>w%rllv`PN`j7K95Eq>f_*}biq%I7}qO-)Tb2ucmvw8;j9tKPC@C<9X5aad*C zzMaYnk|b>HgBhxRigMnxfB!=iD~>vt5IA1Db?ZaK9Ytm36y~pNAO?>+@};I~k&#Ps z8$ODbv8zR-m;JD`AbD|3;!jqh>XuORsr# ztgpXpB^RD*)7r|n)%9~%6*p$LXrbC*NvD`EiwB|+ z1i^L<4O}?uX_9xPH8$2UMwx1HY{dnJ)}EW6({{|^QE7Ap)2-16wf8mlpatIE*C2vM zGy20ad;cHn|FNpZX6}mn*3Cyn)@|O~gpr=^{k4iTUuD{gi_@<@{;aoJkP0KTe;XL=IFG*Y!rl&Mkw~WWYK2g}egJO8ny6ul2ooiOV8}EM~Y2R_p@wkBG=M}Lt zjzoor%l8&rFo?ZDAVzod0ovKI953cjbMU@$&P0Ke6+T=N8EQb&zxAa zu0BXCd~$eg@8^Cy-yJ=EVO_uK$4<3cYyL>h$WY{kuhJ^-z_2LQHl}J!W4TTPi;RTV z)A`~8J%Sco)jA+d6z&@F-0>!C1ZW%rKz?TS$j7zKtp54F#t(Wvb~Nw!AMbnkWT)#! z^Ji|{s&RSX?b0I zj;ziay4Y`jtB8_8H?^Kc?i#DvbtnLhKfCWe*&4aD-=bdK4?k?#cf3m&IRMyMK)8djIjCGm@X5KN~kTH%Q^< zzZees=VCn6{lWFgo9^}=WjAZ)7!~tn3tLVbTs8P< z^?TLxZ0L7!4zr85o1C#)d3bwa$-_S~6@Ok|Ny~qH#>Yjs%5(Eyw5r`ZQQwpdHFWCL z{Gjo+&Eih=>b2(RY3;;*y@vKYYItv^zHI%IyT3g2?{Moht9<&hS)NlGbv=HRGD<_D zx$j_pZ2beVYfNse?cc7Q+_GgO3S(B&KfLY7KK~V(3gP2^UvmA|C^tLut;h)+S#vah zR-ymq%~)XBwT9(E@^}7@AoW-8wBgzF=dT$B=lAu!CerLQPfsOx_k7DMJ4XcD3>rST z;+gV#w|&rsTm)Sx6DKyWHrw@IUSqvC41Xz6}Wrv$C|57032yH`ffTsqJZO@O*g1 z(4j+5ojVtllr*sX>sJ+j|9@=0`cG^4mOXpy*fH1X(;Mg(H(p*frNXI@fy zyEmqc*)Xyaon+1rBmey_>Y8(Sn`@j5-bjiShd_`b=;3ZQVZ`rk`0HzjRjW?O8QkGG z9U1Awi4%sm3U29`2kbrkLmYIwunN1dtE+1u_wh};n^9job{+LZZ=3U(@&i8FmZ6KE zZqph6tkn6odE2lC*WDe|H>K6&pLRAL&|LG^H^*p%r(v|x;`|*mRYO?}ShmB5k6HdH z@*+t_k_wB9i%m5Ql}xAFHf_^AS}D9mmVCO3=j#06UJi~oZeg{YHfz?-l8Ust5220G zmgOXhVyEqU_Uw_9UcXjeEbGMP$jFF&+N6)PAnU??U&Rht{F02cfB$~7@Ss;M1~mV& zE?wWVH5Zn5+V=SkcaxQVMgdyzQqegl$^&}#?Q26xd*XV=SZ8M~DWGEXs|A-gcl=M6 z6BWIog_YHH*?p5-EiKg({lA|-VWq3RB_}e)#3Fjqfaam=8@3IzPLta-B&zZj`N&t> za{uSE>v~-)wcMTSr#JE+FB#fo`sKnRv%uH4Q&MClMlZ&mnH=zLRZPtEEouMh!8mCc z>*+NFcgaZu{HvX&C#*==vuD!(p#^JxN zKg(?3l!pHSu72Iv5}m&z`>)^M^7tPa^4F!D3;#QW|N6~n-~Sw_z@P5_DGI5pdukIB z*Jr@K=Dv9&ziWgz=D5(TO;jZwmhqqAR3a%}7x?-HkLb8&M8{k4QzlMq#gNr|FpXgE zYpe(u$&}4{R-X}E^+Wa+)|L}QM(pmT#P6?-i|foGdE(dtjO~3B&I5j8eN+!{I;Ub( z2g6;In5U^4RJF~TiJuM)Rl5DxUjiB3-rZZJw6xUDi@BAZwOx)(@ZuL2=Ifw* ziKnr1=gxx|Lkk}tZ@p=(zTsy2$PZFiP)MwbkME7xo_1H$QOuf#d6AhrcdqNadEp42 z({O<%HV(53vF zibYbeV$|pCmFuqfm0CW@b1)G`dCUYVBoFaJbTn^acNCvePPKR8^Isk0(hIXpH+lL77KKY(9?ce$#jQj2Ui-YR(ttY4ZS( zeN>)4eX2WPK%14dUt7>gyb1TltR-@xg=_NOy%X3}U_;o1t033=R0c-QGvhMz9?>-3Qd;2eG%(PDn#yG0AY6XX&IAz_Cw0l&vYiYs{o{lYaFsqPz=fgB4 zVc}hM@?>{f#WyhfT8n~H5G|z@A157~oZ$E*ub{P9;T9Ga60Xh9F5P|?=jSw=;%WEU>DW&m_SnHm>dNE15opC^oAJ1j% z#2>HIR2aUmL@c`w#mMOTl?L(JedcOGJ$$)*0@idIu7GbJ&fd)}DQVKbfB!fmGqc^p zE?7J13Q^*s?NgS`!($Vg61v}&%yjI*3kTQDe{w?mgv*t1`c#bdRy}0Cqz*732yW7q zWY1?_pFkk&vbUYSV*%Rh)3Y<1%zJTh6^T;HNL1Nx8?mJ69;P&c$Q#{8?&>S!aQZwa zEHT$y7Ll}}Acc#w9krRFXBXL6zOPprXii_-W+c@-<3Wog@{6pKe zU%GhFm9@qI)0P}jw}vhh(sIG_fe1Kh?8(tM2L*ol^Upt(3(lWEe=0uyuO)TX)+Mh_ zA3J<_9dDpZ-;<5H!!Y;x^EaJ4*Nd%~<^+yGY7e53Ly7j9N4}=k<@-NZR<7e1qiq{3 zqC>$C`dzy=LVvpNYv81cF52a#kta_RLxJ{?XzgW*jROn)E1BMx%bIXL?bd+0(O0{% zx{Pzuvfh#_^MXj^g|%B)9cZ7|U%Y6;{(c<$k^eXiAIvTfdXU`IbJdFVUs!N_4oE8o zZ^$B0F%|Hv-nw-d%1u6Nii0&g)Q65r z+?-ST`rW&R#LTJXRacw6`4Yo3GB!50AmyH7r|NIm?bE?LzW@_Xtn+F8 z?YXNeGURpc4R@L)mKcG;o#PyxFD>qhfSH?@Cu47KKfv#^#gjbsqzeQJSdXZ(mgTT1 z>@mw_%gi7JOChpcalMJcfS|YrgU9Cy@zzxf=)OBQ`;jud*HwI^3($(%m&%(!IBe`6d^VuEVy)TvXvztMy<@=U=< zC4O=(e?=+p^U=MMUqC&S^=13luU|`%IHnOhhO{-;JYKV52jS4^h?AHjE&#j<)Fmo| z0{h|7qZy;sGw=HKTUKr$Vi!BTUFXh9JmEBsaIa_Q5EI7lu^ggno}Wf_MqwkOv^b)M za>b3$#8H`@dB}$kb&ZS~eecQ=kW}95*Q-&YUSsp=eDmJ0hGqSyB6+Xl0}VSHSoQ6j zD=h;ZzSo55Mp?2P^{{XmQ1)0evQ5o01Qab}!E$+TY8F+w2N`U1r6xOWpo|6FWf;d@ zDN%jNSS-u*=wZTPbNb;sW8X}oe@fC#TvF0a$SMma%c!xrUlZ9!&#q*^rLyEksLndW zMW{%x5$uK}C|Q?mMA}x8Ueg^q#(5B#F2;S#4{=H!CEb|!M`?$Y4HTVU!SGIw{oxtZ zkzuSS4Yf|&hZ$6hfsPj!y$eq%gJuaMn<`6DNIA2!*98SBQizc{H&}*mTfe3zdojgc5&M*vlrKSxPa}5;H+tNI{53lCId% zViQ0ocjko3E|equG~h`;e0Qt|Kxn_u+?K=*mRuNYUnnv@uijq9f_<@`(rrwtD?uwG2x`xCrwRT!f6S9hwLYj|`jy8x`w|1@a z%$YMq8ezC*+#1C=(@d|+9<;_7JDhe)(_g<~eY>&w1wfe;(uyc0Kp$a+g|keU3+<5T zX0S9~$5a!1Dl-8nB?jU_*%SxZfu_cy{09Xd2CD>~Hj<&%>xDw3Nx$3?rNX49or zD@rRW#wyj+qU8G^^r)c zf}10gY#TXaM#sQoGs6S${)s}Fvz41HU%p%*JFY;IZexboF-;@F^w}nkWu1{nTVBwi zL+xI?cp?0=}r4;?UbE!6VD3CM+pwi+NQy=qe*2x3wMoCO>}- zu{R2=YuvZ*x}!(CFlKTS`QX~CS8eGcx3aYjv20y++H1}naZ=48#u33|SOb8?L7^EQ zJw7~-SE($?>3-&8`=~#L{n&-gVKrFOU>nA?dMpj3MQygLaEgxB?6+~p61oW+qCFU{ zKNY*vvG>%r1N;I>$cS7`ex}|uY=Wgo4o{yycLTj7s_yc!-Fml$zHfdE>ZzfQri!#| z+2s!vkvBhn>7cJK3l_Dci)lTOd+o!GI~b+bW#8SnDVO>{!KgKFK5m=dY)$i)p~}~A zuWZ@98{9dr(5#vMvUS)w%1h9(yF}dD8UmfLmKfY@th058sq@meuXsj?m!wNeN^US? z)R>jP{*Rfpy8W_Em>j7!d@Ou?dNI{D5S&wxa6Bw5%x6Q}ehaN5M+D5RfUm8a7m zZb6`tkq+A*=w|!Px?E&BK zFo~Xb+2_Mkbsm8TpXZE-5j}RPav=s~rg455AHPfoEg}S2<4yT}&meqW!+tCK@;zl~ z!J>LI*iNyK-*q zzH>}1UbA&;Q!$jA5D;k@tUN?Z+aa@q`FWy6Zed}pQ#bQh{A&Zr)bF#dlncJFYtaJ~ z+*s1;zu1WSJg>O;R?BrE3)M6=gK581+@xcEgQ-`iIV5o7cL6MlIZv8&{rYve$jHc$@bGblcX_HKMyN_@{gXotW|pZaKn2iyumMr`B;Z3O0fN8eTQz83=he{~3tip7n>6hM*5^DOFzMP=OVhZC3g`+ihCqq1psnd+-^+hHlTEO^M)Roj<*OJ1**Q`|%! zH6+68w{NQgrtgejXqu$6@cW=;KWh5nR}4eoeXna~IB#LA?zF_N+rPgpXDn=Fd0cED@;dtcyB&@^ovj$D1gssn4aQTW$iD))7}D z3FM}n8ew?}ljy~S1U*ATW#YzAG+dR-BdyDSh}aDtTuW;DR#oL$Zrif@^FJ82-yGs= zEA8&0xCjY}joqkY-j;ObOutFW%V#V`E%Sf@fydsx?AdaaZBHJOf&|MlY})fm9=AI+ zyL7PJw4MIV0~SpM zkssW%y_%*TG{2Lnm@I8P{XS1tFUSI<7&E<&Np$z{K@`zFsQE03@EfkhKP{F^bwjH1aF#79ByXl43ul{$8ssH7l zLQd4`WxO*C;=ywRzMlbVblHZ@Qy!?5PJ2%fk3biUF8-4q6@j{9X3lEr@amY$m9`X% z>5y>hL@{VN!CeucL=)?Y>+P1!=Q#|odv$5a$>+;B?=1-J^25iE0`@>t<`NPW8#i8? zoIGg2fC1tbp1ks-m9WGxgS1k;5keqtdc(-R9C1wWH#5) z*@U4>e#xg7gCSH-^X^UFd$jgk3(5WaN&mfmc?iEL(R1*i4dCLe2?B$Dd~?UauA6Ri z^WM6R|Lt#1DEHBpe)FOcD>&D*@X1LXHs1X+(}zP0yjwHi+0doe|LYxA3g1!pc}t_; z^s|#IPiAY|cKNxvfi%rKPEVrWFy-2{NCT@wZXy5eTh~qdn+w%+W@aWrZ3Tq?Pp>yI z{l8&{|96Psw8=;V$WoBm=N`|bn&!014cG`hkK2!=i1=xT8Q;lY^TwdA$|3|q)S{;N zYAgSK7${UH>MyKZW@5s^4wNUGD2?BnHOsn+FF=xrfDgRe|Kqw7>bd3n4Qa|~Kpkrd zyKs+zKl@SzRs14pc?(MM2btBw4O_Hmq1vjKB6Y_NR-X^F$$$KD2>4Wj^5+#6Dv~c~1e64=mR&+V1;~T}t0+RW?%cUkn0=u; zMa}UZ^LC9;;IOcmGuQ%G;3J0cT?b)p=oA}>C8J%}u8lr@`m{OIW41d2WD407#4ZNGoOHM&6Z-o8};vY7VkCusMJ zK1F{xzN8!jtdpno_MM4yxjR_jD$6GXS zzLrYhH5AdT?DJg(#^^Hs#6d!!DM!XT*3$SDNg8AbfE>mDJ$kh$>s*O1vmVB(;~d#{ z*b5W-VCLe8b^yT?AuJ4m7bz721B^hw^BY4$>jgv;fyXAsE5j9#&-!yTZ3w>}MP3%=_06w@rTQYR zvQk7qafK8qvJy4CD(?Ns$tzmIzih9*?47UB+{pRh!yO|!THy_&eiRh$i^ZLPtx+j0 zda2hidwUx+uKO(P<6q-K_s;V(Sf8xyexP|U-3QknK5T|$h^i10GDxb|U<_Abe}XA& zVrph4C<~5@oVNctr7rip-IHntG7GwCfzV<>=F|yU$7DonKy2&g%@w#j>Tq!+%EiSW zTQN{PlyMq0fI39UHJdgyVKVokjq^EXOGvsbw$D`hqOxN>r*fyfIqGCUK#j9kw&1DR zQ-0-z>Kr<_3eSi5G07NE7~w@1_U7%|*RHIqZ6OLLYAf$AZ$}DPE3S^s6P;+OxPsQS zkPRDBsc5__KTQ#G0>#XA66WSzyFwv91j4r~rm0GQ#K@7Qh4>LJDWEdQPA! z6Z~G#x^?Z0j7Chq^fUDgu3ibnSyQsIPEImnZq&rK&6))f^rAQgVJ8zqQVzA_JgZgQ zJpDm~rhaUvA5i;se6W}7nHk4G{;Lij?nLfxc2iYV^&Yt}!ap6XiCDgf#0kvkywH1>FEGxJD zSdrAauXLI3*nj>TPfmgq0w5rP3a9S6^q`g;Uh~)5h*oF z<24I`a-kb#OfVoi<^Fw#{6Vjh{Z%B$k=v*DA3b{XW@OJ-_i%)h;G-n@B+*`l5US5mf7&nZX)R{m&X@px{dBN;o) zmsgC4Gdi5QMZnnm_thyn*YR*!N+xuxv>CL**{e0MRkEOo=egOhgtnB9zyI&Dm~u~@ zpsJw7#_y-j zpI4B`Ed|cqzaMI~uwC7>foQ*Nibb31UH`Z1I-0atyLXfa!f|X@4u-#hB=ecBme-_R zVVZ?57YPPyr5t(6adtaTWy1-iT^EZg; z41!XfKfh#hWf!1X0GcF~Z4e?|8VpI^$B&H|^ix<=BpMA{ea*vvpxfnv%OngFg0422 zc+R$~aT5w{4u>P?%9iSAJskniaQq7Kod5F7w6lp@J288K#DxR?&+Y6%!Kk|(u zF!FM9B}qI+NM3O@0&=0FcL_YUOLb6P%_5=OV443$k3;mHHsWK#y0~X-)|+)dDmd$q zm{%9VV|WetQ106qznDAsia?htJXS>ZIHQ3BwOOO%6ON-!2vh$8PJ|c8&>=FfU27J< zof=|;yu7?{AUI$4GZ7Qn9lk?3%1NLBo#SZ_ll_-VS9x2Nhnx27X)ScR9o=2v5uD>z zfDRhGcZs0r*45Ru@T_7}NcF`&m8m4`HI8j4`Ru&<%zt%k^hw>_y*yWQ_`D>6C1acd zfEbbmXifQYx1Tw$+L)oGT6p3HV+~jUvoWAkJ88n%zOojq;YG*Z<>jqHsxrJi+mzv$ z;5+^H?d6eFvlg<Dg~0HqsXol4Lj!9`aQvS9pfP{eJ(Fg<$Mrx=_=(ybOEhe7ngqNC;)fA zMQ;smUwjJU6Si86EVFggOA)UO|Jq4K5QgT9C}!BcofHU5^L&wJS!E3Zso^b>+PPKr zPM!`MU}I%vWk<))9&6-$O2XRm*P?8?a|cs{6ns(qGKY0(+`D%gy1P(cW6F3@f#(nc z|B62;RN8zwC?|k9j<8?@NC2Gwe!HoyfMn(+}Lack7l4u3&3QMLuz3y&$La zOG>7)1{*eS-dwAPhwc6++HKo5#v0SUV@E?Hd$?8ty^JKX-|QuvswY_hk-z+YI{FEm zjNTS@(VC^~jl{Z*;qQkkAf{!dB5#@ZR4|?Y0#W>i6rF3@nlI*%p@UR9hTs2iaoSPW zaojr)v|q)CmLffiks%S4MGrcqSHRMY?Z=gfK#)21QD%xsRD&^wVIsJzZA1(?Lp0c1miBT$ThO-P$(F#^xlB2_r zN?}+L&uizNJpvP5T=vzFsecP7$0k3iq-8f3{tS>Vpq-$wd0-S=QFN%OS;p4n9dO}@ z7-6xb>XPQqpAYEL)76#mWD{l_OqKMvW}f0qK^kclaGRv@K^+c{(k5!>gJ`I9ud*j2 z{v30An3Gd8h8u3+BOXF^;8PF5)GDYod|4E_7Dd(@)P(c`qHP2x`DeWU;Gd{NXDu%* z$WVdi7(AoI4%qm8bB?vEr{^yBfxho#-h5=}MfCKg{>|JxJUqe+%DyNZ(WogXK7abO z!*^%fk#5@;Em}0@*^TGVCr%uX$O0uL^dR`fW%_4RN5w6%5ER+GAF$Bq^OHKjJVFZ0poGvZ&; z76CUFM5Ynq^nE*KB>P;$=68x}K-d9*rt65#c00Ol#4M;P^cuMTg&)(&T>M))3;ncL zb{`lj;R4%f>aEYYNXo5Sw~koS`OOZz|MZcq=777#LQC`TXb&70dj19-^QmzGu!7!S z-;ZU@HAIQOMu2d|AHo+m3K**1Eklbd2@iN`)zs7ivBTt2I}4eP;+Y4#NUE+d>ED=G(d*N7JO+|lixxZFlRj*kow0F+ z!YUqbf%8dJtfilNn)eD#F!op(bp6sovnI^7*$AA-#XpfsE4~`Hi5uev`GAtvP_$+~ z9J=4R>{B~kBK*0Z_V2X4mo8mO$(K2^h+Z5z9S%9HWKCZUQfhjN-tCwmc>>S3>gpq2 ztruD1Z_Gfyn`2(?ebryL_|tj}#IAtCrSF?ak9ke2ZgS0gZXUJFG4u(f@n9#OGi^E@A%0Ot1?Y3#- z#vv#x)T1$BKd3(yHh-O^G`7raOpqa`O%~={F-J88icYp8P>HT8QCh?PAF`~4td|0j ziEk8>er|So`DZn#!C_0!K7DS`vLx!`8h}A!W1CF4gXl~8ZxE&0PYJAK-u4jZv=j;? zdK-p*`}R%bPRv(7p`@L`|16;f-sn5*O*CAJ6bN_GJXWV~-@b5d3f*0jtr@H1DCWJY zJiKDa(3HW9*(aJ`*CR4rVZ~JVg~l4HA8a$JJ08 zN(X(uG^B~HPrZY*P)^LR!A#QGev_ZUZ2T=&pxDpMBZ?%f3fnDEbS;bUAaTKQd)+$Q zL32{UyFP7{e!`~+AHu4lqM~rP2wxMbS?{}EdwS1N;DVUQI&4nqh`pxja56i4d1{ca z4>|3Jx~J04qm3q^LP(o&_EqjmfyX8&k=tV8cb?tDPm-tG)vjdp*|`%$vEjFpE?*Wl zouJ_0X-rm7?%9Qp(n8q&f`c1ylH(umeA!ml=_w&t)+?Rf^m|Gc@N5y!r(jK#2rtD0 zPKJeq*p8`>^7k8N5aN$6F`4xM)W_ay%oGDTx&w_(Oa_BEa*@_ap#|TJ(kk~IIG_Xa z^U?l=8IwBg-M0__haxpi4dU3nCr|E<(YPEF(?KBdg9mjnJ~tzvGzgDDMP(>is}=s*9A;)R5Mg$ELZfFD~Cz)8h5V1{gQnXLRI=39StQQ8RU;*@y!l4VvYu6~nTwSkDw)+Xz^u4eZiZl{RF3*GN zNreO|dpHlDsT4@ip52x1?!;Wmu5Y+m^|od%sQKpI#>_{NeV8KDDBcO|=%iwu zF(HDdrl-48lYOK3yN5GH*iG4Qk7w1QF*=`{r4DJXhOYy&KrT4~P2@U~!fIj~EiKNG zf05ctDcMxDj3r#H4W6xlR%nxHT~UQ>_xGQLh*Yv{V5r$&ia?Xjszt%T$c!yqh#>PhLQR{v}j@k}LOK{}y@6FnD;OecQjZ@B` zQwdc_l2`##g2ozM_A8ZT!v?_%ZdK<=-Sb}iAMKtzmMmwa0D2@Eli9`oe3EA%ylBuL zVIYAN6fc-d;Y%jX3y$ z*Zn_owg2l4LjGUTS?Y83ufP6x4EO*4rTxBh|EKTTmd8R{6@oJqE(#g{tB>x@xAbbe zX7y?x6tMdZ*ZgzcIq4@{x`b>*19{&kiL@I%;}?_ksn1q9E5DG)T0mm{%k%3s*A;lQqg z9q39pmpGN;TQEqdW8lK@=gW9q1&LmNPv2UG2DF{|cY8FUVI^t(8o;uYiK~zs!iJ4U z=IJ@lrUa<9luV%fW?xb>lB&+2y_t$S2Z0-%HRivMRMvzY5TRK>ql-rh&1AH5z(6x&U^LXW`gd z&_7cu3Kmk)3XTloMh+^<)YMdrb#lHEFpzcYbVC@7f69+C8Fq=^m}({Zgs|-j2Z4mQ zK;@?{Dg{EN2`q~C3iT2N$TRh?KKF1w8HYr_9)9CG%-rM0kA<}cE5?z<2guW|ysfaQ z2w^_lqiAyhpw9v!~KE-ZfBRa`W*C(KO{iZ@N0qY*$x?CH}8 ze~q2j8~4}H4^kjR=J4<1f86Z$J$ZZw`Z*XCA{r3pdU>JSili&zYJ#)lb|y9+_WJ#M!`lA1UaKnsGxvoB#_beMRfHij^8$2FQHhc# zQ=m~!oG+n%JIJD9(FOu*%WW0V3UN*Iw^ib-5AcEl^o#Pu(i~CvDiB3KO>~@a&L6Y_ z+P9ie*ttJc?{R&$;}mscu_90d=~nIE|NeV-Uf?thOj=r6;@;9Z3zWZ#ck&c0kQmc| zI)4Lf?CRkm4`l>_CYF`x^+eI#$n)&-H5)inl*3u~#yxvlQSak(2^97yZn^2wvi5uT z?tM7#I(3j}0D}<~cj7s5iEx87IL6Q`&)QQH8xNXj?7yXY9(8Th&7TPy#=(J}m2k8-q7@*f_b z)It*EsJ6`^17yDH3;XtIOS*b`GNPd$p*63dAef9mEZ2ja=WYtN6d(5~hF;)qwD!v3 zw58s3q6tC(4}q9`L4$=r*PjV?0*)P7j{>T6sz!6 z+*05utwAd;J~tIEe-u}D_7Ka&g4+t;WbF6ZHF%j&_1Pb%fNZ=GX6xp}BP&Y0E&KO} z^HANgqKAp_D3oA4YrmQ|*caB^Rq*QJ9%}_@!n>yP{Dek|7YrKEjRRd`CnxkkQ4|$Y z+{!}|$8fg7s3wMBKs1S2E>MGv^lmA-SxnsIv6ed^Y!DwbH8mHJMMM!C<)LsVn(s$w z&@j~D7YQbCiIXg|+LRpu4*>k>b4hp%+J;hQp3b83}v!`Ecm8G7fVGwQcedc1ha1#uIUK>l4oaaB(=tk z8{1{Kb2%Il5dj|^`urPa*F_t;Gs2J-Ql;1Q>YZoawb`HZpq2l*Rr*tn6-&8C=f#~JRZEG{fGAlPiX zIse5)y+{3)R~)l^Mqr3OLwjY{yC#}Gs&D?p%I;YAjGm`3(r_3ex1BePtMj4cU^SX# zMiGLJ`d@jI{(?=n=!4Bm?_@Z%xDS+Db{}b*v}Wr!kc1q*F~5_3_2z%qAB{3+>PTWG zMfh^=16U)nti#H4tNKupwSzjM;;R=XBtDrMMV*X<+|gk3*I4-~_vWu2QLC|;#Z&@1 zBONBp#5AG2Dd^rqVs?ywZ}L=xuV{5Z{FNjjVMX}9=_&W-PxN-XB@g8SpxBqZ;?=qaacIw|n9{ef*yzS_%`arJ{VphDVS0g&c|4#y zRIRsqg4U}54Cln;%2%<&Tae|GDiw?Q(DaS84 z)lCuSb}!Ydo5HltKm_gyZ=tb#df;Mh$+NlDCc6&G zOu#u%N)W9~LRol}>3QJojuqlOq)}e`h{qOODY}q|Pn<*?_tg~Y!RBAx%vqx=RMMW! z9ByF9{>ciSPJR*{*QjQNbZ(--8BGaU`J*P`VaJnad&DLxs_NPgllm>iClGk7rTjwu z1Hpr*nqTq8BzB~0Ph;bZMf)7mVL~DW3XhCK(lTGqKi7O901CLsgSNZHV|Sf1 zM};gdT_w0~Aw}j7`s?K74I?pcfxUF~^({X>rAVIs&(9mBkx7P@=r-Lp=jDeM&IKQp zEq~0C2Z=LVZwm=g-ZAyl*xz*tU)Ss4$dx9H$$B?*s*Sz< z$*VnE39bSnLry|!(TqKrG7ciLD5uQxyYG+yb+ZedD;Wv1Z%gDU;fa!r8G8@&-WH*6 zj9s(McRdbzl~s<}T6xBF!z#hiL){1$2$TBjI!D~9EMis`LwC&u=Z55Gk@sS2iWJM_|6a!f0>l{lET`(^z1J*^>_9u4*vxFsvm*Q5#DH2&>Zv z7=pgGiB4nPV#`x2W;FC=MJsm=CmPk)Ajb8flWFUrD8X_Pfqnd})6IfNnt^p~vrA8i zagO&M55$Tm37}Jocj#Th6mKm#=3uf9tcDI%2>6!QF!;=i3NdwE9rpeF{9l%q5y@`F zI4GrDT7&Anf=u@}BM~-`VjT`CF!*-M$**ea{vrSJz;r&)TBu|Y3d$b>*7#ADUeFhjY97Ev(1)$ykA?3E@@DM z7^TFIbsRaciN}=Gfh)clV;KtJJIyDyp?q-EPa!j#z({j%rElHiJZMvw&ipa*O;6Wm zeZhEiZXxzio^JWHyGf%)L+}>};+R^e`OQRtaJwl3beNilhIHg}eV4QFE!s`q+@-O* z0Bt!dzBCFvX48wuWMjCsua$5Gyr| z3EgwJ#}5%BzCj8^_m6v2gk@5cj(kgWDS_sAuEJc1;?R(ya6{|9jj>WtIdtB=(MT7Y z3)3m((JQaX*AGp*!EB`hot4JckVRiN3{CR zYXelc$CzKp&&1LB1|bOMS84sAH&zH#1gdH7?rZR{y7L^r(Ssq5fwG)bhr}tYDjqXt zyhfX~6h>MvuSOJ&yuTP=0ClQ{4Wm4jhf~)#f~ZC)v7B#s!)9MDfB5H zV12$3VrXYnu3>r{;CkVS+95VJcy$F0Y-?*9hzcfLepn-N@q`FKo=Zb_&z@)XpPGH_ zJSDN0=&2#SM8QZf0Anx^9#^OsXCN><)hm^_Ph|QTgi)b5LmC5D#mFFfI`508FzG^L zp_VeyB{F@B3x+n4dc{42QjwGB&i>5(dCjBQY34gS@fJ`EQ_KFpJa@N-RYTK>iKn@uQAWF!3gYwS|9h)i$E>G(KlMz3j zBRHM#Vsc~jy|-4EY{%}TM=@bhC(&F3+~A%I2OQZJgHZ#~LkUkNT6v#DZufhZe(KMY zm3gZtPxWWmi>-b{+LW6k}s>1z0Y0@On z^3W%FYU&ZQ>giOw^I!qN=gq!#zfQ4$SEjxfu?oMMbNp;gQ3pfpM8S2mN}mZ|#I+ov zq0w$SJRqkv>jqi({7l9KNJC2xE*Zc)F&+u*M37L}oFFAR&KDUM(uoW;U+TfA8; zBioHwe+9*~z{e*QDbPpz&hjoWA%q_(kP;1h?9fi$Xt;S`1femBb}Z^UJti+m%y~Hy z-|~Y{3t>q#xl?fHs|mvF3uAo#w1nArDfY|Z9t;~+-S4G^2E-eZx$#-3!97C<0`?IjTdLWmle!a03LR0VDavBy8F+WYHbXcWCuc7UT5sWxVQs=IDuMoyTb|JLV=z||u zxZu^x#T>+7q&+9B?yo0e$VdT<(AT!H6yc{|6#|7AOoEk*!6cKPZWuuB_C7+qWTsmOXsO2(f#>U||BG{aHXOzW*8g-L7}<-s$Isc^djh3`~S|r}Ee0 zpI58tr!N+!oCAI9;1z{lf-!0^SW{pnkdG+tgg^t7e^kLNI|%%a9o&U?kl%tuU$bjh zOVKwAk@lu|=|gezf4pQFPcFWt)AHa=Qnj(LjU01Uhxla(l$ay)nGMp8j)Cc-=odUT z2a{Sb9441Z%YwH?1ZObX4G{LDPnGQ3qGw?e#<WNRcQd@=O_ zr{wjtv<8S9X+Y_PNn0oE`JQy}O?K_4vrlsT+ZKoxAk=(ZhK-3&!i-6=5N%^6O(@v| zbiGOMEj6{cPO>Tkqb7!%h}8o2VEV5a%&hB-8A4cah5o_WdAIUB^B5E?6`{V9QwRRA z?gn^d@U;b4Jm#)0rX6O->2ONKHSR#O_buL-W|WxaaK39GVdEOcel9}TJ47iIu_+U z<~mRy{^j?C-5hgix+pB1=FOSYoRm9Yc47OI##(W-y6#19e14`th5KDKrG$mqhRW?C=7EzQ?i$;Q=lR0O zvUY|LZvjykNzC_(1F1n-AJTN)%?;1#mYP)>F(ciH)Uie{C`mK`UZmL?Ic42`I1BW~g# z@0vDy`t%NrI*mC)fL{+)EnFvd--TS9XNqrY)AO&8C==M8T{gG~1AblCu3gh8*T2N; zFp8j)6hg@vcl;~|R<$v2h_Lst?cv=OF>wTvFbKNN8<X3v||@1%nK z<6P`(-l{o{6BI`(z(dB3L2vCCYc=*nB?c7b^ff#hH1Zuka<~z5pPm{#J)0;mKwR7j zqDQlnJgTRhwmmMV#mLO`Y3zt?Hz~KeNFh`vab7DC4mdX@F-Tt*;mn=V!58aPzmD3|?mfQab$j;90vZ4k3K4F4w zFTVo)o%i8`k^c+^e0K2~?&o*W$t7R_lQE`tehf-P7V2=^9suX5YEQd$ zi6Ik3-=E#QX_x1`o5KPevpM+gK34kRWmL$@-}@iFTSnwll2V>Mo8YAll-17?YE~X9 zq7(vA{$?VUc54ioVkJ+*HcTxiIx5pBG9v<1JnH^B@oC8@vbn%?!jT=!QQFws)FFP= z04f?ybOt%KW`aqS7$hhr;jsSH8PG%5^k6Zf4vlm4$9|pkf-trJDfA|K<5%N2*CcF2 z0nuMyDoJk2#~k;p@jBtNsSGC z%R7g(JzH?I8$Il88F2*@fAn&omW~b($VhBskxITcT*Z(DNf&OkJK~l;(5*|5nm$J) zeqhWKVn~n34L;9MU^VtiGpKOhF%2q$4i7qpZK=X(Y(ZqToB*`-lcerLHT9=R}aXU%-YnoGm!8&i7* z+A-$qtBc-Ni5FoG7l;`UMb_6rAZc`Pcb<})M5nf8HugXV1`^6xh|ULq>Iji&^n&1% znzLxf+xi&14+>pfZa`pQz!C`xe6tIHh^y2zndZD4ghV0P6Dx>bz?fu72e44Rc+$~1E#_g}E z8C%hMnp17Entom1ogQ^wJ*fCRlOWeDU0lrVm6V8HO@+>r2mooO5pl`4_|MVi9EX~B zAqt_^((&@~eF8jt>g9<8fh@*3UQARehuz7l9>>-__*FCbLrEQyjOMGl5BWtv z(dID1hTk*pj%h-pe-?;=sqJ9m{Vu(l+b$WRbEkCy6PS?Mi6LB1^#VO`A9G z#De?FxE|R+5$n|S-|5Ix_L9JvuaxpK;@QBZW!BYe${{MV$|xMC&74U-gt&{H+oeus z3yzl;O#K`tsiCfoGrTqE(&{5BJuDT)R7GGz>pmm$ec3FN^K9nu9`$QW3~ZbC>T>5P z22LG>d8G8di|?1 z6PC=JDfkwjg?W%6b|0r+_KRGwIx@DY20c_AI*g&>wvU_qV@fHi_PeK>YR*!chqgL? z<)D&MgMp*Y>NKTQiW(&Tkj*;qn4(CDA(B9**PWzNLXD) z$9XMWc*bB#;yFoI97H#|f77&8*zVlxLc8;;MoICdzZK`ULm|mAk4Hg}pLp>e_t7QAG4=qP0?u`rB zx`{zxg4Oit212$PK$q{_7Z)9{mreF;t-NqeXw8Mtnv&<8$NXuVS;}I|+Snu^^VTtS zHelavbvKTTJH8v~?U>Wz>T*wqgoV>Rbs#5t*%#jl?bB34sHjCR#*S*Etqp|4qal0z zq(Mf`O13H`?$Zt{>vD*)Gw$y1I%V(puh|K@efxQ=oL{El?met<$0QCKmUJm@Bitvm zhIh9`jT57iXE7WWf5<72rDqi)kBmeoU1offsxP}y9u}b!kDIOykQNE zE#tfzNKBSZBlivyTWEN2L*)NADxn7zglRZ;gXcZ;@dil_7!jM&8DzK8P<9xU!fiwAJU zX}fPxR)fGYQx03PVxSFV^~3WGezb1bY1Ceep+nc6TY)Gp1(I_jftPk41);zW^gORO z@eE^gMRj8F_?;b^WyE_NHZo!m{K5~zr*=E(;&g85Uh2l)Io>oh9inkpTKHq$t&`*| zxs2+m<1g$ZKqyE!AjRMZ!(tA~om_oMw?FtM{y=b_K7FLk`IV0I-rU^~vGS9ku^mdq z&~nKF-u%(y1jdjFMqX4LbAuksV3%#LSIDUj!471 zZ33g#j^1SYLz67Cb8}s#tK5ith%QBw12ON2ZYsi=DA<&}psk{F7Ey4dEXna-hu$KS zQ9UUc+>c$=K)4(QBM7%^m>-9aLT91v8eXK znKLPZ2k=G}cZ~6B#n3F;n2pKuLYe1)q=-AkI8Yk{chZHM71#~v>XcJj&=F87W-ih} zjdB`}@9O%?ms6hi<)=Axt-tWYiyn3Wli~Ma@OBh0Vq|yG-gD>}9BvTt{^s40$t$Wf zfF!}^y>M^*560dDp6j;%8~#a?mfetwtP)yer&J=5nKF`9Rv{#_BBYFjWHm@qBq3W$ zR(2vYJ3Bl3c^~RL@9X;C&vQSw*X!zaUgw$L_r$eROPT)nNY%^Ge()rreMXz{FeJIu__ z&s=t#>I?=i&5TYPxH|XTjTu z(0)YSf=ZF%in_U#WY56#dj)DO_mmMQ*ln9wC#L|lPUJF zML4^&okZ1Pv8jly?O=JvKu=FBtdv9OcW@}KqQC~DzFFgxp?fE8V%4M{5}e1<)T%D_ z9Rxsj6oz#x?G}K3Bm#>_Na0dz{qzm4xYgUb%ZFy~yIfAaRDDeJ^h`YDnsVIZ9pf!E zKs1y4l6aEC-G;BZIXOuL5ll-aGoX`I z&l#Qf8{ZR#ubA-L^_3efP#2ipD22s?;4%0}oC&M=we`@x5sYVkZc?{r&TnWG;vx9Z zyVHI8XIKl<z%fI+lXIolXp*o&vikm7e6X>7QIrBAtx37qR zO>-0f)J%9#&~vNLckMViIXijPMW!|=A4oc4If;dF$>oyhw-Ef8~{dF%T>W)S}Ci?21aP z>kHha&lSXGWo6aV&RJpm@xQ;;^_AEmT##tD6zB<|itYG46|Vcp@iGbec)1Wi>-ahK zPhNTf_Ms~sg>>M`WzK4l@FVyG0+aX+j7>~vDE8BXkomAbeafM@q4d%>$*H?bvHRB+ zpL|3t>#wx1ldGIdKv6<@>+G$KXxr(~QEZ?2IR22`yB8@ao1CV{Nc0mUzhib^)8><3 z{TFnUf(x~t9A1{(ZSam0$>(zPW#$V+B|yeLL0O3A`1A`)Chlb+S+BQQr6-m^k`0`) z09`B-{il|!BIzym^fza@Jn}Xe4{?pf1w490^Z3WAIoCF0T86J)p8E^n0DF-ORblj( zJ~B}XazDy^sK|EV5h+yw`F1O}TR9amu+cQ2h9e18=zyMLvw4q5AfosK7!L-Q4Ew2i zpnKSp-HTTM6$KfA0MPi*b%PHuIz`h8AhvUcvj0JALA*;uA#CoO7x5duR13W zR@D3s@})L5?iTl@#*2l2dc*=9#c>4r!&vKqO|lJ-sfjVZQiiGI%*;qUIw_QfJYNNN z){CC1af&bABIT?lcZ@jJNL!AOVg$X)Op6r6lyY5}1DvCX?<^CTY~ z9yEmKKf|RE;fgC#d{HblNl3F7*9zsIX3n-Hq_D}O(&C)k5?T9cpnp{=`~d~{5;*4G z0AL~$?7#|=6xCzCZ1+&9^Na`1Eyo~|mB|8AYovPV-_nY0q*7}9Hru~dJAhl7r8a8f zW?`Q`&)b}>L(LtJ2bqxloLpqy(pj6aYOYsjWK?w-O2q)~80wQs-0a2ca8miJ7Y3{& z85fw)M#xAsMujTp?2n~Bul_{6Fy~s_vqVLwSdB!p0iPvCccRRK!2Q7>TVS;sm$lA4 zn3|_2QA~2oWo(<71;bROz3g171;J4R2M_W5bS{eMjEvlH; zzJLGz0i}qm(M-%X(pzXMjaZW2V>o;Wq3u=fgJF`2qobOkTfvwStpo|VCpQcGN?OFM zzhdNdNc)p*-Uh2B=xB?5=sK;pHO6Y>k`hK?=wsFZD}jaOF7SLA5}KN{weY>=7tHe6 zs|u$M5h9QTUOb0aF!SKjqjKD`R%k3V8q_2F{Hj%R31y}_Hgcawm-VtU5sTp4-NM05 z65($^pg<0F)WvIXv2szVG4-;HEoVq;qz~b}d}82xiMMEM)(fVXP}Vy;pGal&`z|%O zDf*Z8Ia`Tf0%(&|9?_Vh39+|m$h5bn&aTM@pvFJ#M z0VF(RjL7SJzJ;|#mpOp6PJMZ^h`3tD1yGujixx>OWeReknp35L4Cvn|xv(^- z>0A?m01euX{ev2tu`go|*PK=#MnFhU+f5ihC23PA^5GQwh^pBt^9?53^D5}AolVuH z`~cpm{&Fk!E6|!9o6cE4r$e)?Z_hS#S-Y;dY<&v>t!(-7S?owmKHJr-L~lGJ9SAV$ z)f8;Q7v^t%-9B|@hFm4c1v+CDq+bDsk#c1 z1Vn;LVt%LwyAhBWM+qTOb2F? zCFAzvfbxKBuYwNW9YJs>hKrdxmNS?a&kHA|Tg$F{dXxaNND#fE<9Mqp=jdk z?Hynw>!>kN@XQ&3O`8%pVhYxDOcW0YJDtLTxeI6{{+#JgU6xivRoL*bNGIXojxl2v zum{`vxB?It$6O@p<=MPA_Ve{rM}XoRBw+Jli21XVl{mZQeH|fN+@C!^ZR)<&4_}cu zr-4i%`SJO$*}dRX79->QY=)h%Igw)ZmgsLz50X}0^d1rNkX5KF}4__wi5v)iTDRS4Vl8@Utwcd8+up-NJ89B zn$dfUA-t4acnjY#+nwGBT`?s3RNI%5h`eRA%sk?wztE78Bn%R_eMxh1(W-2Qvkb~7 zTEdzWH6jpbrc5Us1y_AH}c~$o3)BV_%zx`N`nP zkdr|Pmo{VeA1pUu4ZH1)mWtr%CB87|dnX~dIALc)-UBnqHv-L~f-w`Fm3;eeEB2hE zF5C$dMFXaMOPblqCR=QtUm_&V)vtiLg*3P59YfpgLLJXx7IlK7#M5U$2>y_sa7vSO z=P|8HlrcoUNQ73duVjvZdc0Tr